文本是《数据结构(共13篇)》专题的第 7 篇。阅读本文前,建议先阅读前面的文章:
一、什么是映射?
借助关键码直接查找数据元素并对其进行操作的数据结构就是映射,映射中的数据是以(key, value)的形式进行存储的,其中 key 为关键码对象,value 为具体的数据对象。生活中有很多地方都使用这样的数据结构来存储数据,比如:字典、车牌号、身份证等。
二、基于链表的映射实现
1️⃣ 定义Map接口
from abc import ABC, abstractmethod
class Map(ABC):
"""映射接口的抽象基类"""
@abstractmethod
def add(self, key, value):
"""向Map中添加元素"""
pass
@abstractmethod
def remove(self, key):
"""根据key值找到value,并从Map中删除"""
pass
@abstractmethod
def contains(self, key):
"""根据key值查看Map中是否包含该元素"""
pass
@abstractmethod
def get(self, key):
"""根据key值查询value值"""
pass
@abstractmethod
def set(self, key, new_value):
"""根据key值修改value值"""
pass
@abstractmethod
def get_size(self):
"""查看Map中的元素个数"""
pass
@abstractmethod
def is_empty(self):
"""查看Map是否为空"""
pass
2️⃣ 实现具体的Map类
class LinkedListMap(Map):
"""基于链表的映射实现"""
class Node:
"""内部节点类"""
def __init__(self, key=None, value=None, next_node=None):
self.key = key
self.value = value
self.next = next_node
def __str__(self):
return f"{self.key} : {self.value}"
def __init__(self):
self.dummy_head = self.Node()
self.size = 0
def add(self, key, value):
"""添加键值对"""
node = self._get_node(key)
if node is None:
self.dummy_head.next = self.Node(key, value, self.dummy_head.next)
self.size += 1
else:
node.value = value
def remove(self, key):
"""根据key删除元素并返回value"""
prev = self.dummy_head
while prev.next is not None:
if prev.next.key == key:
break
prev = prev.next
if prev.next is not None:
del_node = prev.next
prev.next = del_node.next
del_node.next = None
self.size -= 1
return del_node.value
return None
def contains(self, key):
"""检查是否包含指定key"""
return self._get_node(key) is not None
def get(self, key):
"""根据key获取value"""
node = self._get_node(key)
return node.value if node is not None else None
def set(self, key, new_value):
"""修改指定key的value"""
node = self._get_node(key)
if node is None:
raise ValueError(f"{key} doesn't exist!")
node.value = new_value
def get_size(self):
"""获取映射大小"""
return self.size
def is_empty(self):
"""检查映射是否为空"""
return self.size == 0
def _get_node(self, key):
"""通用方法:根据key查找节点"""
cur = self.dummy_head.next
while cur is not None:
if cur.key == key:
return cur
cur = cur.next
return None
3️⃣ 测试
def test_linked_list_map():
"""测试基于链表的映射"""
print("Pride and Prejudice")
words = []
if read_file("pride-and-prejudice.txt", words):
print(f"Total words: {len(words)}")
map_instance = LinkedListMap()
for word in words:
if map_instance.contains(word):
map_instance.set(word, map_instance.get(word) + 1)
else:
map_instance.add(word, 1)
print(f"Total different words: {map_instance.get_size()}")
print(f"Frequency of PRIDE: {map_instance.get('pride')}")
print(f"Frequency of PREJUDICE: {map_instance.get('prejudice')}")
print()
def read_file(filename, words):
"""读取文件内容到words列表中"""
try:
with open(filename, 'r', encoding='utf-8') as file:
for line in file:
# 简单的单词分割处理
line_words = line.strip().lower().split()
words.extend(line_words)
return True
except FileNotFoundError:
print(f"文件 {filename} 不存在")
return False
三、基于二分搜索树的映射实现
1️⃣ 实现具体的Map类
class BSTMap(Map):
"""基于二分搜索树的映射实现"""
class Node:
"""内部节点类"""
def __init__(self, key, value):
self.key = key
self.value = value
self.left = None
self.right = None
def __init__(self):
self.root = None
self.size = 0
def add(self, key, value):
"""添加键值对"""
self.root = self._add(self.root, key, value)
def remove(self, key):
"""删除指定key的元素并返回value"""
node = self._get_node(self.root, key)
if node is not None:
self.root = self._remove(self.root, key)
return node.value
return None
def contains(self, key):
"""检查是否包含指定key"""
return self._get_node(self.root, key) is not None
def get(self, key):
"""根据key获取value"""
node = self._get_node(self.root, key)
return node.value if node is not None else None
def set(self, key, new_value):
"""修改指定key的value"""
node = self._get_node(self.root, key)
if node is None:
raise ValueError(f"{key} doesn't exist!")
node.value = new_value
def get_size(self):
"""获取映射大小"""
return self.size
def is_empty(self):
"""检查映射是否为空"""
return self.size == 0
def _add(self, node, key, value):
"""向以node为根的二分搜索树中插入元素<key, value>,递归算法"""
# 递归的终止条件
if node is None:
self.size += 1
return self.Node(key, value)
# 递归调用
if key < node.key:
node.left = self._add(node.left, key, value)
elif key > node.key:
node.right = self._add(node.right, key, value)
else: # key == node.key
node.value = value
return node
def _get_node(self, node, key):
"""返回以node为根节点的二分搜索树中,key所在的节点"""
if node is None:
return None
if key == node.key:
return node
elif key < node.key:
return self._get_node(node.left, key)
else:
return self._get_node(node.right, key)
def _minimum(self, node):
"""返回以node为根的二分搜索树的最小值所在的节点"""
if node.left is None:
return node
return self._minimum(node.left)
def _remove_min(self, node):
"""删除掉以node为根的二分搜索树中的最小节点"""
if node.left is None:
right_node = node.right
node.right = None
self.size -= 1
return right_node
node.left = self._remove_min(node.left)
return node
def _remove(self, node, key):
"""删除掉以node为根的二分搜索树中值为key的节点, 递归算法"""
if node is None:
return None
if key < node.key:
node.left = self._remove(node.left, key)
return node
elif key > node.key:
node.right = self._remove(node.right, key)
return node
else: # key == node.key
# 待删除节点左子树为空的情况
if node.left is None:
right_node = node.right
node.right = None
self.size -= 1
return right_node
# 待删除节点右子树为空的情况
if node.right is None:
left_node = node.left
node.left = None
self.size -= 1
return left_node
# 待删除节点左右子树均不为空的情况
# 找到比待删除节点大的最小节点, 即待删除节点右子树的最小节点
# 用这个节点顶替待删除节点的位置
successor = self._minimum(node.right)
successor.right = self._remove_min(node.right)
successor.left = node.left
node.left = node.right = None
return successor
2️⃣ 测试
def test_bst_map():
"""测试基于二分搜索树的映射"""
print("Pride and Prejudice")
words = []
if read_file("pride-and-prejudice.txt", words):
print(f"Total words: {len(words)}")
map_instance = BSTMap()
for word in words:
if map_instance.contains(word):
map_instance.set(word, map_instance.get(word) + 1)
else:
map_instance.add(word, 1)
print(f"Total different words: {map_instance.get_size()}")
print(f"Frequency of PRIDE: {map_instance.get('pride')}")
print(f"Frequency of PREJUDICE: {map_instance.get('prejudice')}")
print()
四、两种实现方式的性能对比
1️⃣ 测试类
import time
def test_map(map_instance, filename):
"""测试映射性能的通用函数"""
start_time = time.time()
print(filename)
words = []
if read_file(filename, words):
print(f"Total words: {len(words)}")
for word in words:
if map_instance.contains(word):
map_instance.set(word, map_instance.get(word) + 1)
else:
map_instance.add(word, 1)
print(f"Total different words: {map_instance.get_size()}")
print(f"Frequency of PRIDE: {map_instance.get('pride')}")
print(f"Frequency of PREJUDICE: {map_instance.get('prejudice')}")
end_time = time.time()
return end_time - start_time
def main():
"""主测试函数"""
filename = "pride-and-prejudice.txt"
bst_map = BSTMap()
time1 = test_map(bst_map, filename)
print(f"BST Map: {time1:.6f} s")
print()
linked_list_map = LinkedListMap()
time2 = test_map(linked_list_map, filename)
print(f"Linked List Map: {time2:.6f} s")
if __name__ == "__main__":
main()
您已阅读完《数据结构(共13篇)》专题的第 7 篇。请继续阅读该专题下面的文章: