GIL
global interpreter lock(cpython)
同一时刻只有一个线程运行在一个cpu上执行字节码(无法将多个线程映射到多个cpu上)
import disdef add(a): a = a + 1 return aprint(dis.dis(add))
GIL在某些情况下会释放
每次的结果都不一样 线程之间的安全问题
GIL会根据执行的直接码行数或者时间片释放GIL
遇到IO操作时主动释放
total = 0def add(): #1. dosomething1 #2. io操作 # 1. dosomething3 global total for i in range(1000000): total += 1def desc(): global total for i in range(1000000): total -= 1import threadingthread1 = threading.Thread(target=add)thread2 = threading.Thread(target=desc)thread1.start()thread2.start()thread1.join()thread2.join()print(total)
多线程编程
操作系统能够调度的的最小单位是进程,因为进程对系统的资源消耗非常大,所以后期就演变成了线程,线程实际上是依赖于我们的进程(任务管理器中我们实际上能看到的其实是进程 ),操作系统能调度的最小单元是线程。
对于io操作为主的编程来说,多进程和多先产出的性能差别不大,甚至多线程比多进程的性能还高,因为多线程编程更加轻量级。
简单的线程
import timefrom threading import Threaddef get_detail_html(url): print("get detail html started") time.sleep(2) print("get detail html end")def get_detail_url(url): print("get detail url started") time.sleep(4) print("get detail url end")if __name__ == '__main__': thread1 = Thread(target=get_detail_html, args=("",)) thread2 = Thread(target=get_detail_url, args=("",)) # 设置为守护线程 当主线程运行完时 子线程被kill掉 thread1.setDaemon(True) thread2.setDaemon(True) start_time = time.time() thread1.start() thread2.start() # 设置为阻塞 等待线程运行完再关闭主线程 thread1.join() thread2.join() # 默认情况下 主线程退出与时 子线程不会被kill掉 print("last time: {}".format(time.time() - start_time))
重载线程实现多线程
import timeimport threadingdef get_detail_html(url): print("get detail html started") time.sleep(2) print("get detail html end")def get_detail_url(url): print("get detail url started") time.sleep(4) print("get detail url end")#2. 通过集成Thread来实现多线程class GetDetailHtml(threading.Thread): def __init__(self, name): super().__init__(name=name) def run(self): print("get detail html started") time.sleep(2) print("get detail html end")class GetDetailUrl(threading.Thread): def __init__(self, name): super().__init__(name=name) def run(self): print("get detail url started") time.sleep(4) print("get detail url end")if __name__ == "__main__": thread1 = GetDetailHtml("get_detail_html") thread2 = GetDetailUrl("get_detail_url") start_time = time.time() thread1.start() thread2.start() thread1.join() thread2.join() #当主线程退出的时候, 子线程kill掉 print ("last time: {}".format(time.time()-start_time))
多线程之间的通信
使用queue
# filename: thread_queue_test.py# 通过queue的方式进行线程间同步from queue import Queueimport timeimport threadingdef get_detail_html(queue): # 死循环 爬取文章详情页 while True: url = queue.get() # for url in detail_url_list: print("get detail html started") time.sleep(2) print("get detail html end")def get_detail_url(queue): # 死循环 爬取文章列表页 while True: print("get detail url started") time.sleep(4) for i in range(20): # put 等到有空闲位置 再放入 # put_nowait 非阻塞方式 queue.put("http://projectsedu.com/{id}".format(id=i)) print("get detail url end")# 1. 线程通信方式- 共享变量if __name__ == "__main__": detail_url_queue = Queue(maxsize=1000) thread_detail_url = threading.Thread(target=get_detail_url, args=(detail_url_queue,)) for i in range(10): html_thread = threading.Thread(target=get_detail_html, args=(detail_url_queue,)) html_thread.start() start_time = time.time() # 调用task_down从主线程退出 detail_url_queue.task_done() # 从queue的角度阻塞 detail_url_queue.join() print("last time: {}".format(time.time() - start_time))
线程的同步问题
在多线程编程中必须要面对的问题
无锁不安全的原因
# 没有锁def add1(a): a += 1def desc1(a): a -= 1"""add1. load a a = 02. load 1 13. + 14. 赋值给a a=1""""""add1. load a a = 02. load 1 13. - 14. 赋值给a a=-1"""import disprint(dis.dis(add1))print(dis.dis(desc1))
普通锁(Lock)
用锁会影响性能,锁会引起死锁(两次获取锁,获取锁之后不释放,互相等待(a需要b的资源 b需要a的资源))
import threadingfrom threading import Locktotal = 0# 定义一把锁lock = Lock()def add(): global total global lock for i in range(1000000): # 获取锁 lock.acquire() total += 1 # 释放锁 lock.release()def desc(): global total for i in range(1000000): lock.acquire() total -= 1 lock.release()thread1 = threading.Thread(target=add)thread2 = threading.Thread(target=desc)thread1.start()thread2.start()thread1.join()thread2.join()print(total)
相互等待(资源竞争)
"""A(a、b)acquire (a)acquire (b)B(a、b)acquire (b)acquire (a)# 解决办法B(a、b)acquire (a)acquire (b)"""
可重入锁(Rlock)
import threadingfrom threading import RLocktotal = 0# 可重入锁 可以在同一个线程中可载入多次lock = RLock()def add(lock): global total for i in range(1000000): # 获取锁 lock.acquire() lock.acquire() total += 1 do_something(lock) # 释放锁 lock.release() lock.release()def desc(): global total for i in range(1000000): lock.acquire() total -= 1 lock.release()def do_something(lock): lock.acquire() # do something lock.release()thread1 = threading.Thread(target=add)thread2 = threading.Thread(target=desc)thread1.start()thread2.start()thread1.join()thread2.join()print(total)
条件变量锁(condition)
用于复杂的线程间同步
# 没有条件锁 不能实现对话import threadingclass XiaoAi(threading.Thread): def __init__(self, lock): super().__init__(name="小爱") self.lock = lock def run(self): self.lock.acquire() print("{} : 在 ".format(self.name)) self.lock.release() self.lock.acquire() print("{} : 好啊 ".format(self.name)) self.lock.release()class TianMao(threading.Thread): def __init__(self, lock): super().__init__(name="天猫精灵") self.lock = lock def run(self): self.lock.acquire() print("{} : 小爱同学 ".format(self.name)) self.lock.release() self.lock.acquire() print("{} : 我们来对古诗吧 ".format(self.name)) self.lock.release()if __name__ == "__main__": cond = threading.Condition() xiaoai = XiaoAi(cond) tianmao = TianMao(cond) xiaoai.start() tianmao.start()
# 条件锁import threadingclass XiaoAi(threading.Thread): def __init__(self, cond): super().__init__(name="小爱") self.cond = cond def run(self): with self.cond: self.cond.wait() print("{} : 在 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 好啊 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 君住长江尾 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 共饮长江水 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 此恨何时已 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 定不负相思意 ".format(self.name)) self.cond.notify()class TianMao(threading.Thread): def __init__(self, cond): super().__init__(name="天猫精灵") self.cond = cond def run(self): with self.cond: print("{} : 小爱同学 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 我们来对古诗吧 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 我住长江头 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 日日思君不见君 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 此水几时休 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 只愿君心似我心 ".format(self.name)) self.cond.notify() self.cond.wait()if __name__ == "__main__": from concurrent import futures cond = threading.Condition() xiaoai = XiaoAi(cond) tianmao = TianMao(cond) # 启动顺序很重要 # 在调用with cond之后才能调用wait或者notify方法 # condition有两层锁, 一把底层锁会在线程调用了wait方法的时候释放, # 上面的锁会在每次调用wait的时候分配一把并放入到cond的等待队列中, # 等到notify方法的唤醒 xiaoai.start() tianmao.start()