知识图谱是一种结构化的知识表示方式,通过实体和关系描述现实世界的知识。WeKnora 集成了知识图谱能力,用于增强检索效果。
传统 RAG: 用户问题 → 向量检索 → 相关文档 → 生成答案
GraphRAG: 用户问题 → 实体识别 → 图谱查询 → 关系扩展 → 增强检索 → 生成答案
优势 | 说明 |
|---|---|
关系推理 | 发现文档间的隐含关系 |
多跳问答 | 支持需要多步推理的问题 |
实体消歧 | 区分同名不同实体 |
知识补全 | 推断缺失的关系 |
文档内容
│
▼
┌─────────────────────────────────────┐
│ 实体抽取 │
│ (LLM 并发提取) │
└─────────────────────────────────────┘
│
▼
┌─────────────────────────────────────┐
│ 关系抽取 │
│ (LLM 批量提取) │
└─────────────────────────────────────┘
│
▼
┌─────────────────────────────────────┐
│ 权重计算 │
│ (PMI + Strength) │
└─────────────────────────────────────┘
│
▼
┌─────────────────────────────────────┐
│ Chunk 图谱 │
│ (文档块关系网络) │
└─────────────────────────────────────┘
「源码路径」: WeKnora/internal/types/graph.go
type Entity struct {
ID string
ChunkIDs []string
Title string `json:"title"`
Type string `json:"type"`
Description string `json:"description"`
}
type Relationship struct {
Source string `json:"source"`
Target string `json:"target"`
Description string `json:"description"`
Weight float64
Strength int `json:"strength"`
}
type GraphBuilder interface {
BuildGraph(ctx context.Context, chunks []*Chunk) error
GetRelationChunks(chunkID string, topK int) []string
}
「源码路径」: WeKnora/internal/application/service/graph.go
type graphBuilder struct {
entityMap map[string]*types.Entity
relationshipMap map[string]*types.Relationship
chatModel chat.Chat
chunkGraph map[string]map[string]*ChunkRelation
mutex sync.RWMutex
}
func NewGraphBuilder(config *config.Config, chatModel chat.Chat) types.GraphBuilder {
return &graphBuilder{
chatModel: chatModel,
entityMap: make(map[string]*types.Entity),
relationshipMap: make(map[string]*types.Relationship),
chunkGraph: make(map[string]map[string]*ChunkRelation),
}
}
func (b *graphBuilder) extractEntities(ctx context.Context, chunk *types.Chunk) ([]*types.Entity, error) {
messages := []chat.Message{
{Role: "system", Content: b.config.Conversation.ExtractEntitiesPrompt},
{Role: "user", Content: chunk.Content},
}
resp, err := b.chatModel.Chat(ctx, messages, &chat.ChatOptions{Temperature: 0.1})
if err != nil {
returnnil, fmt.Errorf("LLM entity extraction failed: %w", err)
}
var extractedEntities []*types.Entity
if err := common.ParseLLMJsonResponse(resp.Content, &extractedEntities); err != nil {
returnnil, fmt.Errorf("failed to parse entity extraction response: %w", err)
}
// 处理实体去重和合并
b.mutex.Lock()
defer b.mutex.Unlock()
var entities []*types.Entity
for _, entity := range extractedEntities {
if entity.Title == "" { continue }
if existEntity, exists := b.entityMapByTitle[entity.Title]; !exists {
entity.ID = uuid.New().String()
entity.ChunkIDs = []string{chunk.ID}
b.entityMapByTitle[entity.Title] = entity
b.entityMap[entity.ID] = entity
entities = append(entities, entity)
} else {
existEntity.ChunkIDs = append(existEntity.ChunkIDs, chunk.ID)
entities = append(entities, existEntity)
}
}
return entities, nil
}
WeKnora 在实体抽取时自动进行去重合并:
entityMapByTitle 快速查找重复实体func (b *graphBuilder) extractRelationships(ctx context.Context, chunks []*types.Chunk, entities []*types.Entity) error {
entitiesJSON, _ := json.Marshal(entities)
content := b.mergeChunkContents(chunks)
messages := []chat.Message{
{Role: "system", Content: b.config.Conversation.ExtractRelationshipsPrompt},
{Role: "user", Content: fmt.Sprintf("Entities: %s\n\nText: %s", string(entitiesJSON), content)},
}
resp, err := b.chatModel.Chat(ctx, messages, &chat.ChatOptions{Temperature: 0.1})
if err != nil {
return fmt.Errorf("LLM relationship extraction failed: %w", err)
}
var extractedRelationships []*types.Relationship
if err := common.ParseLLMJsonResponse(resp.Content, &extractedRelationships); err != nil {
return fmt.Errorf("failed to parse relationship extraction response: %w", err)
}
// 处理关系去重和合并
b.mutex.Lock()
defer b.mutex.Unlock()
for _, relationship := range extractedRelationships {
key := fmt.Sprintf("%s#%s", relationship.Source, relationship.Target)
relationChunkIDs := b.findRelationChunkIDs(relationship.Source, relationship.Target, entities)
if existingRel, exists := b.relationshipMap[key]; !exists {
relationship.ID = uuid.New().String()
relationship.ChunkIDs = relationChunkIDs
b.relationshipMap[key] = relationship
} else {
// 更新现有关系
existingRel.ChunkIDs = append(existingRel.ChunkIDs, relationChunkIDs...)
}
}
returnnil
}
WeKnora 使用 「PMI(点互信息)+ Strength」 混合计算关系权重:
// PMI 权重计算核心公式
func (b *graphBuilder) calculateWeights(ctx context.Context) {
totalEntityOccurrences := 0
entityFrequency := make(map[string]int)
for _, entity := range b.entityMap {
frequency := len(entity.ChunkIDs)
entityFrequency[entity.Title] = frequency
totalEntityOccurrences += frequency
}
for _, rel := range b.relationshipMap {
sourceFreq := entityFrequency[rel.Source]
targetFreq := entityFrequency[rel.Target]
relFreq := len(rel.ChunkIDs)
if sourceFreq > 0 && targetFreq > 0 && relFreq > 0 {
sourceProbability := float64(sourceFreq) / float64(totalEntityOccurrences)
targetProbability := float64(targetFreq) / float64(totalEntityOccurrences)
relProbability := float64(relFreq) / float64(totalEntityOccurrences)
// PMI 计算: log(P(x,y) / (P(x) * P(y)))
pmi := math.Max(math.Log2(relProbability/(sourceProbability*targetProbability)), 0)
// 结合 PMI 和 Strength 计算最终权重
rel.Weight = 1.0 + (pmi*0.6 + float64(rel.Strength)*0.4/10.0)*9.0
}
}
}
WeKnora 基于实体关系构建 「文档块关系图谱」,用于检索时的关系扩展。
type ChunkRelation struct {
Weight float64
Degree int
}
func (b *graphBuilder) buildChunkGraph(ctx context.Context) {
for _, rel := range b.relationshipMap {
sourceEntity := b.entityMapByTitle[rel.Source]
targetEntity := b.entityMapByTitle[rel.Target]
if sourceEntity == nil || targetEntity == nil { continue }
// 构建 Chunk 图谱 - 连接所有相关文档块
for _, sourceChunkID := range sourceEntity.ChunkIDs {
if _, exists := b.chunkGraph[sourceChunkID]; !exists {
b.chunkGraph[sourceChunkID] = make(map[string]*ChunkRelation)
}
for _, targetChunkID := range targetEntity.ChunkIDs {
relation := &ChunkRelation{
Weight: rel.Weight,
Degree: rel.CombinedDegree,
}
// 双向关系
b.chunkGraph[sourceChunkID][targetChunkID] = relation
b.chunkGraph[targetChunkID][sourceChunkID] = relation
}
}
}
}
func (b *graphBuilder) GetRelationChunks(chunkID string, topK int) []string {
b.mutex.RLock()
defer b.mutex.RUnlock()
type weightedChunk struct {
id string
weight float64
}
var weightedChunks []weightedChunk
for relationChunkID, relation := range b.chunkGraph[chunkID] {
weightedChunks = append(weightedChunks, weightedChunk{
id: relationChunkID,
weight: relation.Weight,
})
}
// 按权重排序
slices.SortFunc(weightedChunks, func(a, b weightedChunk) int {
if a.weight > b.weight { return-1 }
return1
})
// 返回 topK 结果
resultCount := min(topK, len(weightedChunks))
chunks := make([]string, resultCount)
for i := 0; i < resultCount; i++ {
chunks[i] = weightedChunks[i].id
}
return chunks
}
// 间接关联查询(二跳)
func (b *graphBuilder) GetIndirectRelationChunks(chunkID string, topK int) []string {
b.mutex.RLock()
defer b.mutex.RUnlock()
// 获取直接关联的 chunks
directChunks := make(map[string]struct{})
directChunks[chunkID] = struct{}{}
for directChunkID := range b.chunkGraph[chunkID] {
directChunks[directChunkID] = struct{}{}
}
// 收集间接关联的 chunks
indirectChunkMap := make(map[string]*ChunkRelation)
for directChunkID, directRelation := range b.chunkGraph[chunkID] {
for indirectChunkID, indirectRelation := range b.chunkGraph[directChunkID] {
// 跳过直接关联的 chunks
if _, isDirect := directChunks[indirectChunkID]; isDirect {
continue
}
// 权重衰减:二跳关系权重使用衰减系数
combinedWeight := directRelation.Weight * indirectRelation.Weight * 0.5
if existingRel, exists := indirectChunkMap[indirectChunkID]; !exists ||
combinedWeight > existingRel.Weight {
indirectChunkMap[indirectChunkID] = &ChunkRelation{Weight: combinedWeight}
}
}
}
// 排序并返回 topK 结果
// ... (类似的排序逻辑)
}
「源码路径」: WeKnora/internal/application/service/graph.go
func (b *graphBuilder) BuildGraph(ctx context.Context, chunks []*types.Chunk) error {
// 1. 并发提取实体
chunkEntities := make([][]*types.Entity, len(chunks))
g, gctx := errgroup.WithContext(ctx)
g.SetLimit(4) // 限制并发数为 4
for i, chunk := range chunks {
i, chunk := i, chunk
g.Go(func() error {
entities, err := b.extractEntities(gctx, chunk)
if err != nil {
return fmt.Errorf("entity extraction failed for chunk %s: %w", chunk.ID, err)
}
chunkEntities[i] = entities
returnnil
})
}
if err := g.Wait(); err != nil {
return fmt.Errorf("entity extraction process failed: %w", err)
}
// 2. 批量处理关系抽取
relationBatchSize := 5// 每批处理 5 个 chunks
for i, batchChunks := range utils.ChunkSlice(chunks, relationBatchSize) {
start := i * relationBatchSize
end := min(start + relationBatchSize, len(chunkEntities))
// 合并当前批次的所有实体
relationUseEntities := make([]*types.Entity, 0)
for j := start; j < end; j++ {
relationUseEntities = append(relationUseEntities, chunkEntities[j]...)
}
iflen(relationUseEntities) >= 2 {
b.extractRelationships(ctx, batchChunks, relationUseEntities)
}
}
// 3. 计算关系权重
b.calculateWeights(ctx)
// 4. 构建 Chunk 图谱
b.buildChunkGraph(ctx)
returnnil
}
「源码路径」: WeKnora/internal/agent/tools/query_knowledge_graph.go
type QueryKnowledgeGraphTool struct {
BaseTool
knowledgeService interfaces.KnowledgeBaseService
}
func NewQueryKnowledgeGraphTool(knowledgeService interfaces.KnowledgeBaseService) *QueryKnowledgeGraphTool {
description := `Query knowledge graph to explore entity relationships and knowledge networks.`
return &QueryKnowledgeGraphTool{
BaseTool: NewBaseTool("query_knowledge_graph", description),
knowledgeService: knowledgeService,
}
}
func (t *QueryKnowledgeGraphTool) Execute(ctx context.Context, args map[string]interface{}) (*types.ToolResult, error) {
kbIDsRaw, ok := args["knowledge_base_ids"].([]interface{})
if !ok || len(kbIDsRaw) == 0 {
return &types.ToolResult{Success: false, Error: "knowledge_base_ids is required"}, nil
}
query, ok := args["query"].(string)
if !ok || query == "" {
return &types.ToolResult{Success: false, Error: "query is required"}, nil
}
// 并发查询所有知识库
var wg sync.WaitGroup
var mu sync.Mutex
kbResults := make(map[string]*graphQueryResult)
searchParams := types.SearchParams{QueryText: query, MatchCount: 10}
for _, kbID := range kbIDs {
wg.Add(1)
gofunc(id string) {
defer wg.Done()
kb, err := t.knowledgeService.GetKnowledgeBaseByID(ctx, id)
if err != nil {
mu.Lock()
kbResults[id] = &graphQueryResult{kbID: id, err: err}
mu.Unlock()
return
}
results, err := t.knowledgeService.HybridSearch(ctx, id, searchParams)
mu.Lock()
kbResults[id] = &graphQueryResult{kbID: id, kb: kb, results: results, err: err}
mu.Unlock()
}(kbID)
}
wg.Wait()
// 处理结果...
}
WeKnora 支持生成 Mermaid 格式的知识图谱可视化图:
// Mermaid 图谱可视化生成
func (b *graphBuilder) generateKnowledgeGraphDiagram(ctx context.Context) string {
var sb strings.Builder
sb.WriteString("```mermaid\ngraph TD\n")
// 获取实体并按频率排序
entities := b.GetAllEntities()
slices.SortFunc(entities, func(a, b *types.Entity) int {
if a.Frequency > b.Frequency { return-1 }
return1
})
// 获取关系并按权重排序
relationships := b.GetAllRelationships()
slices.SortFunc(relationships, func(a, b *types.Relationship) int {
if a.Weight > b.Weight { return-1 }
return1
})
// 创建实体ID映射
entityMap := make(map[string]string)
for i, entity := range entities {
nodeID := fmt.Sprintf("E%d", i)
entityMap[entity.Title] = nodeID
sb.WriteString(fmt.Sprintf(" %s[\"%s\"]\n", nodeID, entity.Title))
}
// 添加关系
for _, rel := range relationships {
sourceID := entityMap[rel.Source]
targetID := entityMap[rel.Target]
linkStyle := "-->"
if rel.Strength > 7 {
linkStyle = "==>"// 强关系样式
}
sb.WriteString(fmt.Sprintf(" %s %s|%s| %s\n",
sourceID, linkStyle, rel.Description, targetID))
}
sb.WriteString("```\n")
return sb.String()
}
「可视化示例」:
graph TD
classDef entity fill:#f9f,stroke:#333,stroke-width:1px;
classDef highFreq fill:#bbf,stroke:#333,stroke-width:2px;
subgraph 子图1
E0["RAG"]
E1["向量检索"]
E2["LLM"]
E0 ==>|使用| E1
E0 ==>|调用| E2
E1 -->|生成| E2
end
class E0 highFreq;
class E1 entity;
class E2 highFreq;
WeKnora 的知识图谱模块具有以下特点: