5.3.1 虚拟pci控制器
(1) 初始化
pc_init1 ==>
i440fx_init(&i440fx_state,&piix3_devfn, &isa_bus, gsi,.....); ==> i440fx_common_init
gsi为全局中断qemu_irq.
isa_bus为pc isa_bus设备对象;
下面是该函数的流程分析:
a) dev =qdev_create(NULL, "i440FX-pcihost");
s = PCI_HOST_BRIDGE(dev);
s->address_space = address_space_mem;
b = pci_bus_new(dev, NULL,pci_address_space,
1. address_space_io,0);
创建 虚拟pcihost, 然后创建pcibus , 该pci bus的parent为i440fx-pcihost.
b) d =pci_create_simple(b, 0, device_name);
*pi440fx_state = DO_UPCAST(PCII440FXState,dev, d);
f = *pi440fx_state;
f->system_memory = address_space_mem;
在pci bus上创建i440fx,并设置其memory
c) 在pci bus上创建pci dev piix3
piix3 = DO_UPCAST(PIIX3State, dev,
pci_create_simple_multifunction(b,-1, true, "PIIX3"));
pci bus的终端map_irq函数为pci_slot_get_pirq
set_irq为piix3_set_irq, 数据为piix3
pci_bus_irqs(b, piix3_set_irq,pci_slot_get_pirq, piix3, PIIX_NUM_PIRQS);
bus->route_intx_pin_to_irq = piix3_route_intx_pin_to_irq
pci_bus_set_route_irq_fn(b,piix3_route_intx_pin_to_irq);
piix3的创建
static const TypeInfo piix3_info = {
.name = "PIIX3",
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(PIIX3State),
.class_init = piix3_class_init,
};
piix3_class_init ==>
k->init = piix3_initfn;
k->config_write = piix3_write_config;
d) piix3的init 为piix3_initfn==> isa_bus_new(&d->dev.qdev, pci_address_space_io(dev));
piix3中包含了一个isa_bus
piix3->pic = pic; //为gsi
*isa_bus = DO_UPCAST(ISABus, qbus, qdev_get_child_bus(&piix3->dev.qdev,"isa.0"));
e) i440fx_update_memory_mappings(f);
为i440fx 分配内存
(2) i440fx-pcihost 与 piix3
static const TypeInfoi440fx_pcihost_info = {
.name = "i440FX-pcihost",
.parent = TYPE_PCI_HOST_BRIDGE,
.instance_size = sizeof(I440FXState),
.class_init = i440fx_pcihost_class_init,
};
static inti440fx_pcihost_initfn(SysBusDevice *dev)
{
memory_region_init_io(&s->conf_mem,&pci_host_conf_le_ops, s,
"pci-conf-idx", 4);
sysbus_add_io(dev, 0xcf8,&s->conf_mem);
sysbus_init_ioports(&s->busdev,0xcf8, 4);//port 操作地址
memory_region_init_io(&s->data_mem,&pci_host_data_le_ops, s,
"pci-conf-data", 4);
sysbus_add_io(dev, 0xcfc, &s->data_mem);port 操作地址
sysbus_init_ioports(&s->busdev,0xcfc, 4);
}
0xcf8, 0xcfc为pc机pci访问的端口,下面看下该接口的虚拟化
const MemoryRegionOpspci_host_conf_le_ops = {
.read = pci_host_config_read,
.write = pci_host_config_write,
.endianness = DEVICE_LITTLE_ENDIAN,
};
static uint64_tpci_host_data_read(void *opaque,
target_phys_addr_t addr, unsigned len)
{
......
val = pci_data_read(s->bus,s->config_reg | (addr & 3), len);
return val;
}
s->config_reg由0xcf8的端口虚拟化操作控制.
uint32_t pci_data_read(PCIBus*s, uint32_t addr, int len)
{
PCIDevice *pci_dev =pci_dev_find_by_addr(s, addr);
uint32_t config_addr = addr &(PCI_CONFIG_SPACE_SIZE - 1);
uint32_t val;
if (!pci_dev) {
return ~0x0;
}
val = pci_host_config_read_common(pci_dev,config_addr,
PCI_CONFIG_SPACE_SIZE, len);
return val;
}
pci_dev_find_by_addr 根据pci设备地址得到pci设备
static inline PCIDevice*pci_dev_find_by_addr(PCIBus *bus, uint32_t addr)
{
uint8_t bus_num = addr >> 16;
uint8_t devfn = addr >> 8;
return pci_find_device(bus, bus_num,devfn);
}
PCIDevice*pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
{
bus = pci_find_bus_nr(bus, bus_num);
if (!bus)
return NULL;
return bus->devices[devfn];
}
pci_create_xxx创建pci dev时要指定devfn.和bus.
pci_host_config_read_common最终会调用pci_dev->config_read
因此i440FX-pcihost的职责就是负责向pci_dev转发config的rw.
piix3的write_config为
static void piix3_write_config(PCIDevice*dev,
uint32_taddress, uint32_t val, int len)
{
pci_default_write_config(dev, address, val,len);
if (ranges_overlap(address, len,PIIX_PIRQC, 4)) {
PIIX3State *piix3 = DO_UPCAST(PIIX3State,dev, dev);
int pic_irq;
pci_bus_fire_intx_routing_notifier(piix3->dev.bus);
piix3_update_irq_levels(piix3);
for (pic_irq = 0; pic_irq <PIIX_NUM_PIC_IRQS; pic_irq++) {
piix3_set_irq_pic(piix3, pic_irq);
}
}
}
piix_set_irq_pic会调用qemu_set_irq发射中断。piix3->pic为gsi
5.3.2 pci device 虚拟化
(1) pci device 初始化
pci device的parent为pci_type.其初始化函数如下:
static TypeInfopci_device_type_info = {
.name = TYPE_PCI_DEVICE,
.parent = TYPE_DEVICE,
.instance_size = sizeof(PCIDevice),
.abstract = true,
.class_size = sizeof(PCIDeviceClass),
.class_init = pci_device_class_init,
};
init 函数 pci_qdev_init ==》
a) do_pci_register_device
b) 为pci设备添加rom
do_pci_register_device的职责是为
a) 根据config_size分配conf用于config寄存器虚拟化
b) bus->dma_context_fn用于pci全硬件虚拟化(iommu),第7章分析
c) pci_device的标准寄存器赋初值,如vendor_id
d) pcidevice中断管理pci_dev->irq =qemu_allocate_irqs(pci_set_irq, pci_dev, PCI_NUM_PINS);对于piix3其irq处理也为pci_set_irq,因此上一节的ide中断最终也由pci_set_irq处理
(2) pci 地址分配
guest_os运行后会为pci device分配baraddress,该地址为物理地址。该过程由config的虚拟化操作实现,就是对pci->conf.
config的虚拟化default函数为pci_default_write_config(对于多数设备直接使用该函数,程序员也可以自定义config_write).
voidpci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
{
int i, was_irq_disabled =pci_irq_disabled(d);
for (i = 0; i < l; val >>= 8, ++i){
uint8_t wmask = d->wmask[addr + i];
uint8_t w1cmask = d->w1cmask[addr +i];
assert(!(wmask & w1cmask));
d->config[addr + i] =(d->config[addr + i] & ~wmask) | (val & wmask);
d->config[addr + i] &= ~(val& w1cmask);
}
if (ranges_overlap(addr, l,PCI_BASE_ADDRESS_0, 24) ||
ranges_overlap(addr, l,PCI_ROM_ADDRESS, 4) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS1,4) ||
range_covers_byte(addr, l,PCI_COMMAND))
pci_update_mappings(d);
if (range_covers_byte(addr, l,PCI_COMMAND))
pci_update_irq_disabled(d,was_irq_disabled);
msi_write_config(d, addr, val, l);
msix_write_config(d, addr, val, l);
}
当bar地址被更新后,pci_update_mappings就被调用
该函数根据bar地址来删除或建立内存区
new_addr = pci_bar_address(d,i, r->type, r->size);
if (r->addr != PCI_BAR_UNMAPPED) {
memory_region_del_subregion(r->address_space,r->memory);
}
r->addr = new_addr;
if (r->addr != PCI_BAR_UNMAPPED) {
memory_region_add_subregion_overlap(r->address_space,
r->addr, r->memory, 1);
5.3.3 pci中断注入
(1) 基本中断处理
static void pci_set_irq(void*opaque, int irq_num, int level)
{
PCIDevice *pci_dev = opaque;
int change;
change = level - pci_irq_state(pci_dev,irq_num);
if (!change)
return;
pci_set_irq_state(pci_dev, irq_num, level);
pci_update_irq_status(pci_dev);
if (pci_irq_disabled(pci_dev))
return;
pci_change_irq_level(pci_dev, irq_num,change);
}
pci_change_irq_level ==> bus->set_irq
bus->irq在初始化时被设为了piix3_set_irq
static voidpiix3_set_irq_pic(PIIX3State *piix3, int pic_irq)
{
qemu_set_irq(piix3->pic[pic_irq],
!!(piix3->pic_levels &
(((1ULL <<PIIX_NUM_PIRQS) - 1) <<
(pic_irq *PIIX_NUM_PIRQS))));
}
所以pci中断最终于gsi关联起来了。
(2) msi中断
x86处理器使用FSB Interrupt Message总线事务转发MSI/MSI-X中断请求。使用这种方法的优点是向CPU内核提交中断请求的同时,提交PCIe设备使用的中断向量,从而CPU不需要使用中断响应周期从寄存器中获得中断向量。msi_init 用于pci_dev初始化时初始化msi能力.
msi_notify 用于发射中断时设置msi的gpa
void msi_notify(PCIDevice*dev, unsigned int vector)
{
.......
if (msi64bit) {
address = pci_get_quad(dev->config +msi_address_lo_off(dev));
} else {
address = pci_get_long(dev->config +msi_address_lo_off(dev));
}
data = pci_get_word(dev->config +msi_data_off(dev, msi64bit));
if (nr_vectors > 1) {
data &= ~(nr_vectors - 1);
data |= vector;
}
stl_le_phys(address, data); //将数据写入gpa address, 注入msi中断
}