diff --git a/docker-compose-mysql.yml b/docker-compose-mysql.yml index 3b5983f8..09f2d72a 100644 --- a/docker-compose-mysql.yml +++ b/docker-compose-mysql.yml @@ -51,18 +51,33 @@ ports: - "8090:8080" + koalawiki-web: image: crpi-j9ha7sxwhatgtvj4.cn-shenzhen.personal.cr.aliyuncs.com/koala-ai/koala-wiki-web command: ["/app/start.sh"] environment: - NEXT_PUBLIC_API_URL=http://koalawiki:8080 # 用于提供给server的地址 build: - context: . - dockerfile: web/Dockerfile + context: ./web + dockerfile: Dockerfile + + nginx: # 需要nginx将前端和后端代理到一个端口 + image: crpi-j9ha7sxwhatgtvj4.cn-shenzhen.personal.cr.aliyuncs.com/koala-ai/nginx:alpine + ports: + - 8090:80 + volumes: + - ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf depends_on: - koalawiki - ports: - - "3000:3000" + - koalawiki-web + + aspire-dashboard: + image: mcr.microsoft.com/dotnet/aspire-dashboard + container_name: aspire-dashboard + restart: always + environment: + - TZ=Asia/Shanghai + - Dashboard:ApplicationName=Aspire volumes: mysql_data: diff --git a/src/KoalaWiki/Git/GitService.cs b/src/KoalaWiki/Git/GitService.cs index 087483c5..154da190 100644 --- a/src/KoalaWiki/Git/GitService.cs +++ b/src/KoalaWiki/Git/GitService.cs @@ -1,5 +1,6 @@ using System.ComponentModel; using KoalaWiki.Core; +using KoalaWiki.Options; using LibGit2Sharp; namespace KoalaWiki.Git; @@ -43,18 +44,26 @@ public static (List commits, string Sha) PullRepository( string userName = "", string password = "") { + var fetchOptions = new FetchOptions() + { + CertificateCheck = (_, _, _) => true, + CredentialsProvider = (_url, _user, _cred) => + new UsernamePasswordCredentials + { + Username = userName, + Password = password + } + }; + + // 设置代理 + if (!string.IsNullOrEmpty(DocumentOptions.Proxy)) + { + fetchOptions.ProxyOptions.Url = DocumentOptions.Proxy; + } + var pullOptions = new PullOptions { - FetchOptions = new FetchOptions() - { - CertificateCheck = (_, _, _) => true, - CredentialsProvider = (_url, _user, _cred) => - new UsernamePasswordCredentials - { - Username = userName, - Password = password - } - } + FetchOptions = fetchOptions }; // 先克隆 @@ -107,13 +116,20 @@ public static GitRepositoryInfo CloneRepository( { var (localPath, organization) = GetRepositoryPath(repositoryUrl); - var cloneOptions = new CloneOptions + var fetchOptions = new FetchOptions + { + CertificateCheck = (_, _, _) => true, + Depth = 0, + }; + + // 设置代理 + if (!string.IsNullOrEmpty(DocumentOptions.Proxy)) + { + fetchOptions.ProxyOptions.Url = DocumentOptions.Proxy; + } + + var cloneOptions = new CloneOptions(fetchOptions) { - FetchOptions = - { - CertificateCheck = (_, _, _) => true, - Depth = 0, - }, BranchName = branch }; @@ -149,24 +165,31 @@ public static GitRepositoryInfo CloneRepository( // 删除目录以后在尝试一次 Directory.Delete(localPath, true); - cloneOptions = new CloneOptions + var retryFetchOptions = new FetchOptions { - BranchName = branch, - FetchOptions = + Depth = 0, + CertificateCheck = (_, _, _) => true, + CredentialsProvider = (_url, _user, _cred) => { - Depth = 0, - CertificateCheck = (_, _, _) => true, - CredentialsProvider = (_url, _user, _cred) => + return new UsernamePasswordCredentials { - return new UsernamePasswordCredentials - { - Username = userName, // 对于Token认证,Username可以随便填 - Password = password - }; - } + Username = userName, // 对于Token认证,Username可以随便填 + Password = password + }; } }; + // 设置代理 + if (!string.IsNullOrEmpty(DocumentOptions.Proxy)) + { + retryFetchOptions.ProxyOptions.Url = DocumentOptions.Proxy; + } + + cloneOptions = new CloneOptions(retryFetchOptions) + { + BranchName = branch, + }; + Repository.Clone(repositoryUrl, localPath, cloneOptions); // 获取当前仓库的git分支 using var repo = new Repository(localPath); @@ -195,24 +218,31 @@ public static GitRepositoryInfo CloneRepository( { var info = Directory.CreateDirectory(localPath); - cloneOptions = new CloneOptions + var authFetchOptions = new FetchOptions { - BranchName = branch, - FetchOptions = + Depth = 0, + CertificateCheck = (_, _, _) => true, + CredentialsProvider = (_url, _user, _cred) => { - Depth = 0, - CertificateCheck = (_, _, _) => true, - CredentialsProvider = (_url, _user, _cred) => + return new UsernamePasswordCredentials { - return new UsernamePasswordCredentials - { - Username = userName, // 对于Token认证,Username可以随便填 - Password = password - }; - } + Username = userName, // 对于Token认证,Username可以随便填 + Password = password + }; } }; + // 设置代理 + if (!string.IsNullOrEmpty(DocumentOptions.Proxy)) + { + authFetchOptions.ProxyOptions.Url = DocumentOptions.Proxy; + } + + cloneOptions = new CloneOptions(authFetchOptions) + { + BranchName = branch, + }; + Repository.Clone(repositoryUrl, localPath, cloneOptions); } diff --git a/src/KoalaWiki/Infrastructure/DocumentsHelper.cs b/src/KoalaWiki/Infrastructure/DocumentsHelper.cs index 7408cee5..abefa8d8 100644 --- a/src/KoalaWiki/Infrastructure/DocumentsHelper.cs +++ b/src/KoalaWiki/Infrastructure/DocumentsHelper.cs @@ -133,9 +133,11 @@ public static async Task ReadMeFile(string path) "qwen2.5-coder-3b-instruct" => 65535, "qwen3-235b-a22b" => 16384, "claude-sonnet-4-20250514" => 63999, - "gemini-2.5-pro-preview-05-06" => 32768, - "gemini-2.5-flash-preview-04-17" => 32768, + "gemini-2.5-pro" => 32768, + "gemini-2.5-flash" => 32768, "Qwen3-32B" => 32768, + "glm-4.5" => 32768, + "glm-4.5v" => 32768, "deepseek-r1:32b-qwen-distill-fp16" => 32768, _ => null }; @@ -147,20 +149,22 @@ public static async Task ReadMeFile(string path) /// public static string[] GetIgnoreFiles(string path) { + var ignoreFiles = new List(); + var ignoreFilePath = Path.Combine(path, ".gitignore"); if (File.Exists(ignoreFilePath)) { // 需要去掉注释 var lines = File.ReadAllLines(ignoreFilePath); - var ignoreFiles = lines.Where(x => !string.IsNullOrWhiteSpace(x) && !x.StartsWith("#")) - .Select(x => x.Trim()).ToList(); - - ignoreFiles.AddRange(DocumentOptions.ExcludedFiles); - - return ignoreFiles.ToArray(); + ignoreFiles.AddRange(lines.Where(x => !string.IsNullOrWhiteSpace(x) && !x.StartsWith("#")) + .Select(x => x.Trim())); } - return []; + // 始终添加配置的排除文件和文件夹 + ignoreFiles.AddRange(DocumentOptions.ExcludedFiles); + ignoreFiles.AddRange(DocumentOptions.ExcludedFolders); + + return ignoreFiles.ToArray(); } public static List GetCatalogueFiles(string path) diff --git a/src/KoalaWiki/KernelFactory.cs b/src/KoalaWiki/KernelFactory.cs index 4a9519df..023c6a5f 100644 --- a/src/KoalaWiki/KernelFactory.cs +++ b/src/KoalaWiki/KernelFactory.cs @@ -26,7 +26,7 @@ public static Kernel GetKernel(string chatEndpoint, string apiKey, string gitPath, string model, bool isCodeAnalysis = true, - List? files = null) + List? files = null, Action? kernelBuilderAction = null) { using var activity = Activity.Current?.Source.StartActivity(); activity?.SetTag("model", model); @@ -99,7 +99,7 @@ public static Kernel GetKernel(string chatEndpoint, } // 添加文件函数 - var fileFunction = new FileFunction(gitPath,files); + var fileFunction = new FileFunction(gitPath, files); kernelBuilder.Plugins.AddFromObject(fileFunction); kernelBuilder.Plugins.AddFromType(); activity?.SetTag("plugins.file_function", "loaded"); @@ -111,6 +111,8 @@ public static Kernel GetKernel(string chatEndpoint, activity?.SetTag("plugins.code_analyze_function", "loaded"); } + kernelBuilderAction?.Invoke(kernelBuilder); + var kernel = kernelBuilder.Build(); kernel.FunctionInvocationFilters.Add(new FunctionResultInterceptor()); diff --git a/src/KoalaWiki/KoalaWarehouse/DocumentPending/DocsFunction.cs b/src/KoalaWiki/KoalaWarehouse/DocumentPending/DocsFunction.cs new file mode 100644 index 00000000..63c8d604 --- /dev/null +++ b/src/KoalaWiki/KoalaWarehouse/DocumentPending/DocsFunction.cs @@ -0,0 +1,123 @@ +using System.ComponentModel; + +namespace KoalaWiki.KoalaWarehouse.DocumentPending; + +public class DocsFunction +{ + /// + /// 写入内容 + /// + /// + [KernelFunction("Write"), Description(""" + Generate the content for the document. + Usage: + - This tool will overwrite the existing content. + - Always edit the existing content first. Do not overwrite it unless explicitly required. + - Use emojis only when the user explicitly requests it. Avoid adding emojis to the document unless specifically asked to do so. + """)] + public string Write( + [Description("The content to write")] string content) + { + Content = content; + if (string.IsNullOrEmpty(Content)) + { + return "Content cannot be empty."; + } + + Content = Content.Trim(); + return @$"Write successful"; + } + + [KernelFunction("Edit"), Description(""" + Perform precise string replacement operations in the generated document. + Usage: + - Before making any edits, you must use the `Read` tool at least once in the conversation. If you attempt to edit without reading the file, the tool will report an error. + - When editing the text output from the `Read` tool, make sure to retain its exact indentation (tabs/spaces), that is, the form that appears after the line number prefix. The line number prefix format is: space + line number + tab. Everything after that tab is the actual file content and must match it. Do not include any components of the line number prefix in the old string or new string. + - Always prioritize editing existing files in the code repository. Do not overwrite the content unless explicitly required. + - Use emojis only when the user explicitly requests it. Do not add emojis to the file unless required. + - If the `oldString` is not unique in the file, the edit will fail. Either provide a longer string with more context to make it unique, or use `replaceAll` to change all instances of the "old string". + - Use `replaceAll` to replace and rename strings throughout the file. This parameter is very useful when renaming variables, etc. + """)] + public string Edit( + [Description("The text to replace")] + string oldString, + [Description("The text to replace it with (must be different from old_string)")] + string newString, + [Description("Replace all occurences of old_string (default false)")] + bool replaceAll = false) + { + if (string.IsNullOrEmpty(Content)) + { + return "Document content is empty, please write content first."; + } + + if (string.IsNullOrEmpty(oldString)) + { + return "Old string cannot be empty."; + } + + if (oldString == newString) + { + return "New string must be different from old string."; + } + + if (!Content.Contains(oldString)) + { + return "Old string not found in document."; + } + + if (!replaceAll && Content.Split(new[] { oldString }, StringSplitOptions.None).Length > 2) + { + return "Old string is not unique in the document. Use replaceAll=true to replace all occurrences or provide a longer string with more context."; + } + + if (replaceAll) + { + Content = Content.Replace(oldString, newString); + } + else + { + int index = Content.IndexOf(oldString); + Content = Content.Substring(0, index) + newString + Content.Substring(index + oldString.Length); + } + + return @$"Edit successful"; + } + + [KernelFunction("Read"), Description(""" + To read the current generated document content, please note that this method can only read the content of the generated document. + Usage: + - By default, it reads up to 2000 lines from the beginning of the file. + - You can choose to specify the line offset and limit, but it is recommended not to provide these parameters to read the entire file. + - Any lines exceeding 2000 characters will be truncated. + - If the file you are reading exists but is empty, you will receive a system warning instead of the file content. + """)] + public string Read( + [Description("The line number to start reading from. Only provide if the file is too large to read at once")] + int offset, + [Description("The number of lines to read. Only provide if the file is too large to read at once.")] + int limit = 2000) + { + var lines = Content.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); + + if (offset < 0 || offset >= lines.Length) + { + // 读取所有 + return string.Join("\n", lines); + } + + if (limit <= 0 || offset + limit > lines.Length) + { + // 读取到结尾 + return string.Join("\n", lines.Skip(offset)); + } + + // 读取指定范围 + return string.Join("\n", lines.Skip(offset).Take(limit)); + } + + /// + /// 内容 + /// + public string? Content { get; private set; } +} \ No newline at end of file diff --git a/src/KoalaWiki/KoalaWarehouse/DocumentPending/DocumentPendingService.cs b/src/KoalaWiki/KoalaWarehouse/DocumentPending/DocumentPendingService.cs index 9bf4e9d6..d02b903f 100644 --- a/src/KoalaWiki/KoalaWarehouse/DocumentPending/DocumentPendingService.cs +++ b/src/KoalaWiki/KoalaWarehouse/DocumentPending/DocumentPendingService.cs @@ -236,6 +236,8 @@ private static async Task ProcessCatalogueItems(DocumentCatalo string gitRepository, string branch, string path, ClassifyType? classify, List files) { DocumentContext.DocumentStore = new DocumentStore(); + + var docs = new DocsFunction(); // 为每个文档处理创建独立的Kernel实例,避免状态管理冲突 var documentKernel = KernelFactory.GetKernel( OpenAIOptions.Endpoint, @@ -243,7 +245,7 @@ private static async Task ProcessCatalogueItems(DocumentCatalo path, OpenAIOptions.ChatModel, false, // 文档生成不需要代码分析功能 - files + files, (builder => { builder.Plugins.AddFromObject(docs); }) ); var chat = documentKernel.Services.GetService(); @@ -288,7 +290,7 @@ private static async Task ProcessCatalogueItems(DocumentCatalo // 保存原始内容,防止精炼失败时丢失 var originalContent = sr.ToString(); - if (string.IsNullOrEmpty(originalContent) && count < 3) + if (string.IsNullOrEmpty(docs.Content) && count < 3) { count++; goto reset; @@ -371,6 +373,8 @@ 7. ENSURE all enhancements are based on the code files analyzed in the original history.AddUserMessage(refineContents); var refinedContent = new StringBuilder(); + int reset1 = 1; + reset1: await foreach (var item in chat.GetStreamingChatMessageContentsAsync(history, settings, documentKernel)) { if (!string.IsNullOrEmpty(item.Content)) @@ -379,6 +383,12 @@ 7. ENSURE all enhancements are based on the code files analyzed in the original } } + if (string.IsNullOrEmpty(docs.Content) && reset1 < 3) + { + reset1++; + goto reset1; + } + // 检查精炼后的内容是否有效 if (!string.IsNullOrWhiteSpace(refinedContent.ToString())) { @@ -398,43 +408,10 @@ 7. ENSURE all enhancements are based on the code files analyzed in the original } } - // 删除内容中所有的内的内容,可能存在多个标签, - var thinkingRegex = new Regex(@".*?", RegexOptions.Singleline); - sr = new StringBuilder(thinkingRegex.Replace(sr.ToString(), string.Empty)); - - - // 使用正则表达式将中的内容提取 - var regex = new Regex(@"(.*?)", RegexOptions.Singleline); - - var match = regex.Match(sr.ToString()); - - if (match.Success) - { - // 提取到的内容 - var extractedContent = match.Groups[1].Value; - sr.Clear(); - sr.Append(extractedContent); - } - - var content = sr.ToString().Trim(); - - // 删除所有的所有的 - var thinkRegex = new Regex(@"(.*?)", RegexOptions.Singleline); - content = thinkRegex.Replace(content, string.Empty); - - // 从docs提取 - var docsRegex = new Regex(@"(.*?)", RegexOptions.Singleline); - var docsMatch = docsRegex.Match(content); - if (docsMatch.Success) - { - // 提取到的内容 - var extractedDocs = docsMatch.Groups[1].Value; - content = content.Replace(docsMatch.Value, extractedDocs); - } var fileItem = new DocumentFileItem() { - Content = content, + Content = docs.Content, DocumentCatalogId = catalog.Id, Description = string.Empty, Extra = new Dictionary(), diff --git a/src/KoalaWiki/KoalaWarehouse/GenerateThinkCatalogue/GenerateThinkCatalogueService.Prompt.cs b/src/KoalaWiki/KoalaWarehouse/GenerateThinkCatalogue/GenerateThinkCatalogueService.Prompt.cs index 8c61b06e..6ef434de 100644 --- a/src/KoalaWiki/KoalaWarehouse/GenerateThinkCatalogue/GenerateThinkCatalogueService.Prompt.cs +++ b/src/KoalaWiki/KoalaWarehouse/GenerateThinkCatalogue/GenerateThinkCatalogueService.Prompt.cs @@ -21,13 +21,13 @@ public static async Task GenerateThinkCataloguePromptAsync(ClassifyType? var enhancementLevel = Math.Min(attemptNumber, 3); var enhancement = enhancementLevel switch { - 0 => "\n\nPlease provide a comprehensive analysis in JSON format within tags.", + 0 => "\n\nGenerate documentation catalog in the specified hierarchical JSON format within tags.", 1 => - "\n\nIMPORTANT: You must respond with valid JSON wrapped in tags. Ensure the JSON is properly formatted and complete.", + "\n\nIMPORTANT: You must respond with valid JSON using the items/children structure within tags. Follow the exact format specified.", 2 => - "\n\nCRITICAL: Previous attempts failed. Please provide ONLY valid JSON within tags. Double-check JSON syntax before responding.", + "\n\nCRITICAL: Previous attempts failed. Generate ONLY valid JSON within tags. Use the hierarchical items structure with title, name, and children fields.", _ => - "\n\nFINAL ATTEMPT: Respond with MINIMAL but VALID JSON in tags. Focus on basic structure: {\"categories\":[{\"name\":\"...\",\"description\":\"...\"}],\"architecture_overview\":\"...\"}. Ensure valid JSON syntax." + "\n\nFINAL ATTEMPT: Generate minimal but valid JSON structure. Must include: {\"items\":[{\"title\":\"getting-started\",\"name\":\"入门指南\",\"children\":[...]},{\"title\":\"deep-dive\",\"name\":\"深入解剖\",\"children\":[...]}]}." }; return basePrompt + enhancement; @@ -40,13 +40,10 @@ private static string GetProjectTypeDescription(ClassifyType? classifyType) { return """ ## Application System - You are generating a documentation catalogue for an enterprise application system. Focus on: - - **Business Domain**: Core business logic, domain models, and value propositions - - **Architecture**: System design patterns, data flow, and component interactions - - **User Experience**: Interface design, user journeys, and accessibility considerations - - **Deployment**: Production deployment strategies, scaling considerations, and operational monitoring - - **Integration**: External service dependencies, API contracts, and data exchange patterns - Structure the documentation to cover all key aspects of the application system. + Focus on application-specific documentation needs: + - **Getting Started**: Business purpose, quick setup, basic usage patterns + - **Deep Dive**: System architecture, core features, technical implementation, integration points + Emphasize user workflows, business logic, and deployment considerations. """; } @@ -54,13 +51,10 @@ Structure the documentation to cover all key aspects of the application system. { return """ ## Development Framework - You are generating a documentation catalogue for a development framework. Focus on: - - **Core Architecture**: Framework design patterns, plugin systems, and extensibility mechanisms - - **Developer Experience**: API consistency, documentation quality, and learning curve - - **Ecosystem**: Compatible tools, community contributions, and third-party integrations - - **Performance**: Runtime efficiency, memory usage, and optimization strategies - - **Standards**: Code conventions, best practices, and architectural guidelines - Structure the documentation to showcase framework capabilities and usage patterns. + Focus on framework-specific documentation needs: + - **Getting Started**: Framework purpose, quick setup, basic concepts, simple examples + - **Deep Dive**: Architecture patterns, extensibility mechanisms, API design, performance optimization + Emphasize developer experience, plugin systems, and integration workflows. """; } @@ -68,13 +62,10 @@ Structure the documentation to showcase framework capabilities and usage pattern { return """ ## Reusable Code Library - You are generating a documentation catalogue for a reusable code library. Focus on: - - **API Design**: Interface consistency, method signatures, and parameter patterns - - **Integration Patterns**: Installation methods, dependency management, and compatibility - - **Usage Examples**: Common use cases, code samples, and implementation patterns - - **Performance**: Efficiency benchmarks, resource usage, and optimization techniques - - **Maintenance**: Version compatibility, breaking changes, and migration guides - Structure the documentation to facilitate library adoption and integration. + Focus on library-specific documentation needs: + - **Getting Started**: Library purpose, installation, basic usage, common examples + - **Deep Dive**: API design, advanced features, performance characteristics, customization options + Emphasize practical usage patterns, integration strategies, and version compatibility. """; } @@ -82,13 +73,10 @@ Structure the documentation to facilitate library adoption and integration. { return """ ## Development Tool - You are generating a documentation catalogue for a development tool. Focus on: - - **Productivity Features**: Core capabilities, automation features, and workflow optimization - - **Configuration**: Setup procedures, customization options, and environment integration - - **Integration**: IDE support, build system compatibility, and toolchain integration - - **User Interface**: Command syntax, GUI elements, and user interaction patterns - - **Performance**: Execution speed, resource consumption, and scalability limits - Structure the documentation to guide users through setup and effective usage. + Focus on tool-specific documentation needs: + - **Getting Started**: Tool purpose, installation, basic configuration, first workflow + - **Deep Dive**: Advanced features, customization options, integration patterns, optimization techniques + Emphasize practical workflows, automation capabilities, and IDE integration. """; } @@ -96,13 +84,10 @@ Structure the documentation to guide users through setup and effective usage. { return """ ## Command-Line Interface Tool - You are generating a documentation catalogue for a CLI tool. Focus on: - - **Command Structure**: Command hierarchy, argument patterns, and option consistency - - **Usability**: Help systems, error messages, and user guidance features - - **Automation**: Scripting capabilities, batch operations, and pipeline integration - - **Configuration**: Config files, environment variables, and persistent settings - - **Performance**: Execution efficiency, startup time, and resource optimization - Structure the documentation to enable efficient command-line usage and automation. + Focus on CLI-specific documentation needs: + - **Getting Started**: Tool purpose, installation, basic commands, common workflows + - **Deep Dive**: Command reference, advanced usage, scripting integration, configuration options + Emphasize command syntax, automation capabilities, and pipeline integration. """; } @@ -110,13 +95,10 @@ Structure the documentation to enable efficient command-line usage and automatio { return """ ## DevOps & Infrastructure Configuration - You are generating a documentation catalogue for a DevOps configuration project. Focus on: - - **Infrastructure Patterns**: Deployment architectures, scaling strategies, and resource management - - **Automation**: CI/CD pipelines, deployment scripts, and infrastructure as code - - **Monitoring**: Logging strategies, metrics collection, and alerting configurations - - **Security**: Access controls, secret management, and compliance requirements - - **Operations**: Maintenance procedures, backup strategies, and disaster recovery - Structure the documentation to support operational excellence and reliable deployments. + Focus on DevOps-specific documentation needs: + - **Getting Started**: Infrastructure purpose, basic setup, deployment workflow, monitoring basics + - **Deep Dive**: Advanced automation, security configuration, scaling strategies, operational procedures + Emphasize deployment patterns, infrastructure as code, and operational excellence. """; } @@ -124,25 +106,19 @@ Structure the documentation to support operational excellence and reliable deplo { return """ ## Documentation & Testing Project - You are generating a documentation catalogue for a documentation or testing project. Focus on: - - **Content Structure**: Information architecture, navigation patterns, and content organization - - **Quality Assurance**: Testing methodologies, coverage strategies, and validation processes - - **Maintenance**: Content lifecycle, update procedures, and version management - - **Accessibility**: Documentation formats, search capabilities, and user experience - - **Standards**: Style guides, contribution guidelines, and quality metrics - Structure the documentation to ensure comprehensive coverage and usability. + Focus on documentation-specific needs: + - **Getting Started**: Project purpose, content overview, contribution basics, style guidelines + - **Deep Dive**: Content architecture, testing methodologies, maintenance procedures, quality standards + Emphasize content organization, quality assurance, and contributor workflows. """; } return """ ## General Project Analysis - You are generating a documentation catalogue for a general software project. Focus on: - - **Architecture**: System design, component relationships, and technical decisions - - **Implementation**: Code quality, design patterns, and development practices - - **Features**: Core functionality, user capabilities, and system behaviors - - **Setup**: Installation procedures, configuration requirements, and environment setup - - **Usage**: Common workflows, integration patterns, and practical applications - Structure the documentation to provide comprehensive coverage of all project aspects. + Focus on general project documentation needs: + - **Getting Started**: Project purpose, setup instructions, basic concepts, common usage + - **Deep Dive**: System architecture, core features, technical implementation, advanced customization + Provide comprehensive coverage balancing accessibility with technical depth. """; } } \ No newline at end of file diff --git a/src/KoalaWiki/KoalaWarehouse/GenerateThinkCatalogue/GenerateThinkCatalogueService.cs b/src/KoalaWiki/KoalaWarehouse/GenerateThinkCatalogue/GenerateThinkCatalogueService.cs index aaf61980..6b8e35f5 100644 --- a/src/KoalaWiki/KoalaWarehouse/GenerateThinkCatalogue/GenerateThinkCatalogueService.cs +++ b/src/KoalaWiki/KoalaWarehouse/GenerateThinkCatalogue/GenerateThinkCatalogueService.cs @@ -149,6 +149,9 @@ Ensure all components are properly categorized. MaxTokens = DocumentsHelper.GetMaxTokens(OpenAIOptions.AnalysisModel) }; + int retry = 1; + retry: + // 流式获取响应 await foreach (var item in chat.GetStreamingChatMessageContentsAsync(history, settings, analysisModel)) { @@ -158,9 +161,21 @@ Ensure all components are properly categorized. } } + // str先清空标签 + var thinkTagRegex = new Regex(@".*?", RegexOptions.Singleline | RegexOptions.IgnoreCase); + var thinkContent = thinkTagRegex.Match(str.ToString()); + str = new StringBuilder(thinkTagRegex.Replace(str.ToString(), string.Empty).Trim()); + + if (str.Length == 0) { - throw new InvalidOperationException("AI 返回了空响应"); + history.AddAssistantMessage(thinkContent.Value); + retry++; + if (retry > 3) + { + throw new Exception("AI生成目录的时候重复多次响应空内容"); + } + goto retry; } // 质量增强逻辑 diff --git a/src/KoalaWiki/KoalaWarehouse/MiniMapService.cs b/src/KoalaWiki/KoalaWarehouse/MiniMapService.cs index 87928922..d97aed65 100644 --- a/src/KoalaWiki/KoalaWarehouse/MiniMapService.cs +++ b/src/KoalaWiki/KoalaWarehouse/MiniMapService.cs @@ -40,6 +40,9 @@ public static async Task GenerateMiniMap(string catalogue, var kernel = KernelFactory.GetKernel(OpenAIOptions.Endpoint, OpenAIOptions.ChatApiKey, path, OpenAIOptions.ChatModel, false); + int retry = 1; + retry: + await foreach (var item in kernel.GetRequiredService() .GetStreamingChatMessageContentsAsync(history, new OpenAIPromptExecutionSettings() { @@ -57,6 +60,18 @@ public static async Task GenerateMiniMap(string catalogue, var thinkingPattern = new Regex(@".*?", RegexOptions.Singleline); miniMap = new StringBuilder(thinkingPattern.Replace(miniMap.ToString(), string.Empty).Trim()); + + // 如果内容是空的则再次执行 + if (miniMap.Length == 0) + { + retry++; + if (retry > 3) + { + throw new Exception("知识图谱生成失败,请检查仓库是否存在代码文件或仓库地址是否正确。"); + } + + goto retry; + } // 开始解析知识图谱 var miniMapContent = miniMap.ToString(); diff --git a/src/KoalaWiki/KoalaWarehouse/Pipeline/IDocumentProcessingStep.cs b/src/KoalaWiki/KoalaWarehouse/Pipeline/IDocumentProcessingStep.cs index 3e0da452..da85b823 100644 --- a/src/KoalaWiki/KoalaWarehouse/Pipeline/IDocumentProcessingStep.cs +++ b/src/KoalaWiki/KoalaWarehouse/Pipeline/IDocumentProcessingStep.cs @@ -56,7 +56,8 @@ public abstract class DocumentProcessingStepBase(ILogger logger ContinueOnFailure = true }; - public abstract Task ExecuteAsync(TInput input, CancellationToken cancellationToken = default); + public abstract Task ExecuteAsync(TInput input, + CancellationToken cancellationToken = default); public virtual Task CanExecuteAsync(TInput input) => Task.FromResult(true); diff --git a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/CatalogueGenerationStep.cs b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/CatalogueGenerationStep.cs index e9c6fcd5..9277d1ba 100644 --- a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/CatalogueGenerationStep.cs +++ b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/CatalogueGenerationStep.cs @@ -63,7 +63,7 @@ public override async Task ExecuteAsync( throw; } - return context; + return await Task.FromResult(context); } public override async Task HandleErrorAsync( diff --git a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/DocumentContentGenerationStep.cs b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/DocumentContentGenerationStep.cs index a406edfd..3b0cfc1c 100644 --- a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/DocumentContentGenerationStep.cs +++ b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/DocumentContentGenerationStep.cs @@ -10,8 +10,7 @@ public DocumentContentGenerationStep(ILogger logg public override string StepName => "生成目录结构中的文档"; - public override async Task ExecuteAsync( - DocumentProcessingContext context, + public override async Task ExecuteAsync(DocumentProcessingContext context, CancellationToken cancellationToken = default) { using var activity = ActivitySource.StartActivity(StepName); diff --git a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/DocumentStructureGenerationStep.cs b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/DocumentStructureGenerationStep.cs index cefcc3bb..5ee248c5 100644 --- a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/DocumentStructureGenerationStep.cs +++ b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/DocumentStructureGenerationStep.cs @@ -10,8 +10,7 @@ public sealed class DocumentStructureGenerationStep(ILogger "生成目录结构"; - public override async Task ExecuteAsync( - DocumentProcessingContext context, + public override async Task ExecuteAsync(DocumentProcessingContext context, CancellationToken cancellationToken = default) { using var activity = ActivitySource.StartActivity(StepName); diff --git a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/KnowledgeGraphGenerationStep.cs b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/KnowledgeGraphGenerationStep.cs index 2ce1ba8f..8873ea87 100644 --- a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/KnowledgeGraphGenerationStep.cs +++ b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/KnowledgeGraphGenerationStep.cs @@ -10,8 +10,7 @@ public KnowledgeGraphGenerationStep(ILogger logger public override string StepName => "生成知识图谱"; - public override async Task ExecuteAsync( - DocumentProcessingContext context, + public override async Task ExecuteAsync(DocumentProcessingContext context, CancellationToken cancellationToken = default) { using var activity = ActivitySource.StartActivity(StepName); diff --git a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/ProjectClassificationStep.cs b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/ProjectClassificationStep.cs index 86462ddf..077da2fe 100644 --- a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/ProjectClassificationStep.cs +++ b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/ProjectClassificationStep.cs @@ -8,8 +8,7 @@ public ProjectClassificationStep(ILogger logger) : ba public override string StepName => "读取或生成项目类别"; - public override async Task ExecuteAsync( - DocumentProcessingContext context, + public override async Task ExecuteAsync(DocumentProcessingContext context, CancellationToken cancellationToken = default) { using var activity = ActivitySource.StartActivity(StepName); diff --git a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/ReadmeGenerationStep.cs b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/ReadmeGenerationStep.cs index 338baa4e..cd21ebda 100644 --- a/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/ReadmeGenerationStep.cs +++ b/src/KoalaWiki/KoalaWarehouse/Pipeline/Steps/ReadmeGenerationStep.cs @@ -35,8 +35,7 @@ public ReadmeGenerationStep(ILogger logger) : base(logger) } }; - public override async Task ExecuteAsync( - DocumentProcessingContext context, + public override async Task ExecuteAsync(DocumentProcessingContext context, CancellationToken cancellationToken = default) { using var activity = ActivitySource.StartActivity(StepName); diff --git a/src/KoalaWiki/Options/DocumentOptions.cs b/src/KoalaWiki/Options/DocumentOptions.cs index 4562aa9a..a51e3fcf 100644 --- a/src/KoalaWiki/Options/DocumentOptions.cs +++ b/src/KoalaWiki/Options/DocumentOptions.cs @@ -85,6 +85,14 @@ public class DocumentOptions /// public static int MaxFileReadCount { get; set; } = 5; + /// + /// Git代理设置 + /// 支持HTTP/HTTPS代理,格式:http://proxy-server:port 或 https://proxy-server:port + /// 可通过环境变量GIT_PROXY进行配置 + /// + /// + public static string? Proxy { get; set; } + public static void InitConfig(IConfiguration configuration) { configuration.GetSection(Name).Get(); @@ -167,5 +175,11 @@ public static void InitConfig(IConfiguration configuration) MaxFileReadCount = count; } } + + var proxy = configuration.GetValue($"GIT_PROXY"); + if (!string.IsNullOrEmpty(proxy)) + { + Proxy = proxy; + } } } \ No newline at end of file diff --git a/src/KoalaWiki/Prompts/Warehouse/AnalyzeCatalogue.md b/src/KoalaWiki/Prompts/Warehouse/AnalyzeCatalogue.md index 36181552..e3f06f4a 100644 --- a/src/KoalaWiki/Prompts/Warehouse/AnalyzeCatalogue.md +++ b/src/KoalaWiki/Prompts/Warehouse/AnalyzeCatalogue.md @@ -1,10 +1,12 @@ -# Engineering Blog Content Catalog Generator +# Project Documentation Catalog Generator -You are a senior software engineer who creates compelling content structures for technical blog series about software projects. Analyze repositories like an experienced developer exploring a new codebase, then generate content catalogs that tell the engineering story through a series of interconnected blog posts - from initial discovery to deep technical insights. +You are a technical documentation architect who analyzes code repositories and generates structured documentation catalogs. Create two-module documentation architecture based on actual project analysis. ## Core Mission -Transform repository code analysis into engaging blog content series that tell the complete engineering story. Create content architectures that guide readers through a developer's journey of understanding a project - from initial curiosity and first impressions to deep architectural insights and implementation wisdom that fellow engineers would find valuable and inspiring. +Transform repository code analysis into hierarchical documentation catalogs with two modules: +1. **Getting Started Guide** - Project overview and quick setup for new users +2. **Deep Dive Analysis** - Technical architecture and implementation details for advanced users ## Input Analysis @@ -19,618 +21,231 @@ Transform repository code analysis into engaging blog content series that tell t **Target Language:** {{$language}} -**IMPORTANT: ALL generated content, titles, descriptions, and prompts must be written in {{$language}}.** - -## Engineering Blog Series Architecture Principles - -### 1. Developer Learning Journey Architecture - -- **Discovery to Mastery Flow**: Natural progression from "What is this?" to "How can I build something like this?" -- **Engineering Perspective Layers**: Surface-level overview to deep implementation analysis -- **Technical Storytelling**: Each piece builds on previous insights and sets up future revelations -- **Complexity Graduation**: Structure matches how experienced developers actually explore new codebases - -### 2. Engineering Content Discoverability - -- **Problem-Solution Grouping**: Content organized around engineering challenges and solutions -- **Technical Interest Tagging**: Metadata that helps developers find content relevant to their interests -- **Knowledge Building Paths**: Clear progression from basic understanding to implementation expertise -- **Multiple Technical Perspectives**: Different entry points for different engineering backgrounds - -### 3. Engineering-Focused Content Structure - -- **Technical Complexity Scaling**: Content depth matches the sophistication of the implementation -- **Domain-Specific Engineering Patterns**: Tailored content for different types of software projects -- **Developer Workflow Alignment**: Structure matches how engineers actually learn and work with new technologies -- **Technical Insight Calibration**: Balance accessibility with engineering depth - -## Repository Analysis Protocol - -### Step 1: Deep Codebase Analysis - -Systematically analyze ALL provided code files to understand: - -**Core Project Philosophy Mining:** - -1. **Fundamental Problem Definition** - Core technical or business challenges the project solves, why this solution is needed -2. **Design Philosophy Identification** - Core design principles inferred from code organization, naming conventions, API design -3. **Technical Philosophy Embodiment** - Priority choices and trade-offs in simplicity, performance, scalability, usability -4. **Innovation Breakthrough Points** - Unique innovations or improvements in technical implementation, user experience, development efficiency -5. **Value Proposition Analysis** - Unique advantages and differentiating features compared to existing solutions - -**Technical Architecture:** - -1. **Project Classification** - Web app, API, CLI tool, library, framework, or platform -2. **Architectural Patterns** - Microservices, monolith, serverless, component-based -3. **Technology Ecosystem** - Primary stack, dependencies, integration points -4. **Code Organization** - Module structure, separation of concerns, design patterns -5. **Entry Points & Flows** - Application lifecycle, critical user journeys -6. **Configuration Complexity** - Setup requirements, environment management - -**Architectural Decision Deep Analysis:** - -1. **Core Trade-off Decisions** - Key technical choices like performance vs maintainability, simplicity vs feature completeness and their rationale -2. **Technology Stack Selection Logic** - Why specific tech frameworks, languages, databases were chosen, underlying consideration factors -3. **Modular Design Philosophy** - Logic behind component decomposition, boundary division principles, dependency relationship design considerations -4. **Extensibility Design Considerations** - How future needs are accommodated, plugin mechanisms, configuration system design philosophy -5. **Technical Debt Management** - Technical debt handling strategies in the project, refactoring and optimization priority considerations - -**Core Functionality Analysis:** - -1. **Feature Inventory & Classification** - Systematic cataloging of all user-facing features and capabilities -2. **Critical Path Analysis** - Identifying and documenting core user workflows and business processes -3. **Feature Dependency Mapping** - Understanding how features build upon and interact with each other -4. **Business Logic Documentation** - Extracting and explaining core domain logic and decision-making processes -5. **User Value Propositions** - Documenting what problems each feature solves and why it exists -6. **Feature Lifecycle States** - Understanding feature maturity, updates, and potential deprecation paths - -**Project Essence Deep Mining:** - -1. **Core Competitive Advantage Identification** - Project's unique technical advantages, innovation points and core competitiveness -2. **Problem-Solving Approach Analysis** - Unique methodologies and approaches the project uses to solve problems -3. **User Value Creation Mechanism** - How the project creates value for users, what pain points it solves -4. **Technology Ecosystem Positioning** - Project's position and role in the overall technology ecosystem -5. **Design Philosophy Consistency** - Unified design philosophy and principles reflected throughout the project -6. **Sustainable Development Strategy** - Project's long-term development planning and technical evolution path - -**Technical Implementation Deep-Dive:** - -1. **Algorithm & Data Structure Analysis** - Document computational complexity, optimization strategies, and design - choices -2. **Performance & Scalability Documentation** - Analyze bottlenecks, optimization patterns, and scalability - considerations -3. **Security Architecture Analysis** - Document authentication, authorization, data protection, and security patterns -4. **Error Handling & Resilience** - Analyze fault tolerance, recovery mechanisms, and error propagation patterns -5. **Component Interaction Patterns** - Document how system components communicate, coordinate, and collaborate -6. **Data Flow Analysis** - Understanding data transformation pipelines, processing workflows, and storage patterns -7. **Integration & API Documentation** - Analyze external dependencies, service contracts, and integration patterns -8. **Configuration & Environment Management** - Document complex setup requirements, environment-specific behaviors - -**User Interaction Patterns:** - -1. **Primary Use Cases** - Core functionality users interact with -2. **Integration Scenarios** - How this project fits into larger systems -3. **Developer Workflows** - Contributing, extending, customizing patterns -4. **Operational Requirements** - Deployment, monitoring, maintenance needs - -### Step 2: Engineering Audience & Interest Analysis - -Identify distinct developer types and their content interests: - -**Curious Developers (Tech Explorers)** - -- Goal: Understand what makes this project interesting from an engineering perspective -- Interests: Architecture overview, novel approaches, problem domain insights -- Journey: "What is this?" → "How does it work?" → "What makes it clever?" → "Could I use this approach?" - -**Implementation-Focused Engineers (Builders)** - -- Goal: Learn practical patterns and techniques they can apply to their own work -- Interests: Design patterns, implementation strategies, real-world usage examples -- Journey: Problem Context → Solution Approach → Implementation Details → Practical Application - -**Architecture-Minded Engineers (System Designers)** - -- Goal: Understand system design decisions and architectural trade-offs -- Interests: Scalability patterns, performance considerations, architectural innovations -- Journey: System Overview → Design Decisions → Trade-off Analysis → Architectural Lessons - -**Contributing Engineers (Code Contributors)** - -- Goal: Understand the codebase deeply enough to contribute or extend it -- Interests: Code organization, development practices, contribution workflows -- Journey: Codebase Tour → Development Environment → Contribution Process → Advanced Customization - -### Step 3: Intelligent Content Organization - -Structure documentation using enhanced Diátaxis framework with hierarchical organization: - -### Step 4: Domain-Specific Analysis Framework - -Apply specialized analysis patterns based on project type: - -**Web Applications & User Interfaces:** - -- User interaction flows and state management patterns -- Rendering strategies and performance optimization -- Accessibility implementation and user experience patterns -- Client-server communication and data synchronization - -**APIs & Microservices:** - -- Endpoint documentation with request/response schemas -- Service boundary analysis and inter-service communication -- Data contract specifications and versioning strategies -- Authentication, rate limiting, and security implementation - -**Data Processing & Analytics Systems:** - -- Data pipeline architecture and transformation logic -- Storage patterns, indexing strategies, and query optimization -- Batch vs. real-time processing implementation -- Data quality, validation, and monitoring mechanisms - -**Developer Tools & Frameworks:** - -- Extension mechanisms and plugin architecture -- Configuration systems and customization options -- API design patterns and developer experience -- Integration workflows and toolchain compatibility - -**Infrastructure & DevOps Tools:** - -- Deployment strategies and environment management -- Monitoring, logging, and observability implementation -- Resource management and optimization patterns -- Security compliance and operational procedures - -## Engineering Blog Content Framework - -### Exploration Posts (Discovery-Oriented) - -**Purpose**: Help developers discover and understand what makes this project interesting - -**Content Types:** - -1. **First Impressions** - Initial exploration and "What caught my eye" -2. **Core Innovation** - What makes this project technically noteworthy -3. **Problem-Solution Fit** - Understanding the engineering challenge being solved -4. **Quick Win Examples** - Getting developers excited with immediate value - -**Writing Approach:** - -- Share the journey of discovery like exploring a new codebase -- Focus on "aha moments" and interesting engineering insights -- Balance accessibility with technical depth -- Generate curiosity and enthusiasm for deeper exploration - -### Implementation Deep-Dives (Solution-Oriented) - -**Purpose**: Show how engineering problems are solved in practice - -**Content Types:** - -1. **Setup & Configuration** - Getting the development environment working -2. **Core Implementation Patterns** - Key approaches and design decisions -3. **Integration Strategies** - How this fits into larger systems -4. **Performance & Optimization** - Making things work well in production - -**Writing Approach:** - -- Start with the engineering problem and constraints -- Walk through the solution like a code review -- Explain the reasoning behind implementation choices -- Include practical gotchas and lessons learned - -### Technical Reference Posts (Specification-Oriented) - -**Purpose**: Comprehensive technical specifications explained from an engineer's perspective - -**Content Types:** - -1. **API & Interface Guide** - Complete technical specifications with practical context -2. **Configuration Deep-Dive** - All settings explained with real-world usage scenarios -3. **Data Models & Schemas** - Data structure analysis with design rationale -4. **Performance Characteristics** - Benchmarks, limitations, and optimization opportunities - -**Writing Approach:** - -- Present specifications with engineering context and rationale -- Include performance implications and trade-offs -- Provide practical usage guidance beyond basic specifications -- Connect technical details to broader architectural decisions - -### Architectural Insights (Understanding-Oriented) - -**Purpose**: Deep engineering insights that reveal the thinking behind the system - -**Content Types:** - -1. **System Architecture** - High-level design philosophy and key decisions -2. **Design Trade-offs** - Engineering decisions and their implications -3. **Technology Landscape** - How this fits into the broader engineering ecosystem -4. **Advanced Engineering Concepts** - Sophisticated technical insights and innovations - -**Writing Approach:** - -- Explain the "why" behind architectural decisions -- Compare with alternative approaches and explain trade-offs -- Connect to broader engineering principles and industry patterns -- Share insights that help developers think like system architects - -## Engineering Content Complexity Assessment - -### Simple Projects (≤10 core files, single domain) - -**Blog Series Structure**: 6-10 interconnected posts - -- **Content Mix**: 30% Exploration, 35% Implementation, 20% Reference, 15% Architecture -- **Series Depth**: 2 levels maximum (overview + details) -- **Focus**: Clear engineering story with practical insights - -### Medium Projects (11-50 files, multi-component) - -**Blog Series Structure**: 10-16 comprehensive posts - -- **Content Mix**: 25% Exploration, 35% Implementation, 25% Reference, 15% Architecture -- **Series Depth**: 3 levels with interconnected narratives -- **Focus**: Complete engineering journey from discovery to implementation - -### Complex Projects (>50 files, multi-domain/platform) - -**Blog Series Structure**: 16-25 detailed posts - -- **Content Mix**: 20% Exploration, 30% Implementation, 35% Reference, 15% Architecture -- **Series Depth**: 4 levels with multiple story arcs -- **Focus**: Comprehensive engineering analysis with multiple technical perspectives - -## Content Templates with Enhanced Structure - -### Exploration Blog Post Template (Discovery-Oriented) - -``` -Write an engaging blog post about [specific aspect] that captures the excitement of discovering something interesting in this codebase. - -**Engineering Hook:** -- What initially caught your attention about this aspect -- Why this is interesting from an engineering perspective -- What problem or challenge this addresses - -**Discovery Journey:** -- Your initial assumptions or expectations -- What you found when you started digging deeper -- Surprising or clever aspects of the implementation - -**Technical Exploration:** -1. **First Look** - Surface-level observations and initial impressions -2. **Deeper Investigation** - What the code reveals about the engineering approach -3. **Key Insights** - The "aha moments" and interesting discoveries -4. **Broader Implications** - How this connects to larger engineering principles - -**Developer Takeaways:** -- What other engineers can learn from this approach -- Practical applications or patterns they could use -- Questions this raises for further exploration - -**Writing Style:** -- Share your genuine curiosity and discovery process -- Include specific code examples that illustrate key points -- Balance technical detail with accessible explanations -- Connect to broader engineering concepts and practices -``` - -### Implementation Deep-Dive Template (Solution-Oriented) - -``` -Write a technical blog post analyzing how [specific engineering challenge] is solved in this codebase, like a senior developer explaining an interesting solution to colleagues. - -**Engineering Problem Setup:** -- The specific technical challenge being addressed -- Why this problem is interesting or non-trivial -- Constraints and requirements that shaped the solution - -**Solution Analysis:** -- How the developers approached this problem -- Key design decisions and architectural choices -- Alternative approaches and why they weren't chosen - -**Implementation Walkthrough:** -1. **Problem Assessment** - Understanding the technical constraints -2. **Design Approach** - The chosen architectural strategy -3. **Core Implementation** - Key code patterns and techniques -4. **Integration Points** - How this connects to the broader system -5. **Real-world Considerations** - Performance, error handling, edge cases - -**Engineering Insights:** -- Clever solutions or optimizations worth highlighting -- Trade-offs made and their implications -- Lessons other developers can apply to similar problems - -**Writing Approach:** -- Lead with the engineering challenge and why it matters -- Use code examples to illustrate key implementation decisions -- Explain the reasoning behind technical choices -- Share practical insights from analyzing the actual implementation -``` - -### Technical Reference Blog Post Template (Specification-Oriented) - -``` -Write a comprehensive technical analysis of [system component/API/configuration] that serves as both authoritative reference and engineering insight. - -**Engineering Context:** -- What role this component plays in the overall system -- Why it was designed this way -- How it connects to other system components - -**Technical Deep-Dive:** -1. **Interface Design Analysis** - API design decisions and their implications -2. **Implementation Behavior** - How it actually works under different conditions -3. **Data Architecture** - Schema design and data flow patterns -4. **Configuration Strategy** - Design philosophy behind configuration options -5. **Error Handling Approach** - How errors are managed and communicated -6. **Performance Engineering** - Optimization strategies and trade-offs - -**Engineering Analysis:** -- Design patterns and architectural decisions evident in the implementation -- Performance implications and scalability considerations -- Comparison with alternative approaches in the industry -- Evolution potential and extensibility mechanisms - -**Writing Approach:** -- Present specifications with engineering context and rationale -- Include practical usage examples that demonstrate key concepts -- Explain the "why" behind technical design decisions -- Provide both comprehensive coverage and insightful analysis -``` - -### Architectural Insights Blog Post Template (Understanding-Oriented) - -``` -Write an insightful blog post about [system aspect/design decision] that reveals the engineering thinking and architectural wisdom behind the implementation. - -**Engineering Story Setup:** -- The original problem or requirement that drove this design -- Historical context or evolution of the approach -- Alternative solutions that were considered - -**Architectural Analysis:** -1. **Core Design Principles** - The fundamental ideas driving the architecture -2. **Engineering Trade-offs** - Decisions made and their implications -3. **System Integration** - How this fits with other architectural components -4. **Scalability Considerations** - How the design supports growth and change -5. **Industry Context** - How this compares to common industry patterns - -**Multiple Engineering Perspectives:** -- Implementation complexity and developer experience -- Performance and operational implications -- Maintainability and evolution considerations -- Business value and user impact - -**Writing Approach:** -- Share the architectural reasoning like explaining design decisions to fellow architects -- Use concrete examples from the codebase to illustrate abstract concepts -- Explain both the benefits and limitations of the chosen approach -- Connect to broader engineering principles and industry best practices -``` - -### Core Functionality Template (Feature-Oriented) - -``` -Document comprehensive understanding of [core feature/functionality] to enable users to fully comprehend and effectively utilize the system's primary capabilities. - -**Feature Overview:** -- Primary purpose and user value proposition -- Key use cases and scenarios where this feature excels -- Integration with other system features and dependencies -- Feature maturity level and development roadmap position - -**Technical Implementation Analysis:** -1. **Core Algorithms & Logic** - Fundamental computational approaches and decision-making processes -2. **Performance Characteristics** - Efficiency, scalability limits, and optimization strategies -3. **Security Considerations** - Access controls, data protection, and security implications -4. **Error Handling & Edge Cases** - Failure modes, recovery mechanisms, and boundary conditions -5. **Configuration & Customization** - Available options, tuning parameters, and extensibility points - -**User Experience Integration:** -- How users discover and access this functionality -- Common usage patterns and workflows -- Integration with user interface elements -- Success metrics and user feedback mechanisms - -**Technical Architecture Context:** -- Component dependencies and service interactions -- Data flow patterns and storage requirements -- Communication protocols and API interfaces -- Monitoring, logging, and operational considerations - -**Template Requirements:** -- Balance technical depth with user accessibility -- Document both intended usage and technical implementation -- Include practical examples and real-world scenarios -- Address common questions and misconceptions -``` - -### Technical Deep-Dive Template (Implementation-Oriented) - -``` -Provide comprehensive technical analysis of [system component/architecture] to enable advanced users and contributors to understand and work with complex implementation details. - -**Implementation Architecture:** -- Core design patterns and architectural decisions -- Component structure and interaction protocols -- Data structures, algorithms, and computational complexity -- Performance optimization strategies and trade-offs - -**Technical Analysis Framework:** -1. **Algorithm Implementation** - Detailed analysis of computational approaches and efficiency considerations -2. **Data Management** - Storage patterns, caching strategies, and data consistency mechanisms -3. **Concurrency & Parallelization** - Threading models, synchronization patterns, and parallel processing -4. **Network & Communication** - Protocol implementation, message handling, and distributed system concerns -5. **Resource Management** - Memory usage, CPU optimization, and system resource allocation -6. **Security Implementation** - Cryptographic approaches, access controls, and security boundaries - -**Integration Patterns:** -- External system interfaces and communication protocols -- Plugin architecture and extensibility mechanisms -- Configuration management and environment adaptation -- Testing strategies and quality assurance implementation - -**Operational Excellence:** -- Monitoring and observability implementation -- Error reporting and diagnostic capabilities -- Performance metrics and optimization opportunities -- Deployment considerations and operational requirements - -**Template Requirements:** -- Provide implementation-level detail for technical audiences -- Include code examples, diagrams, and architectural illustrations -- Address scalability, maintainability, and extensibility concerns -- Document testing approaches and quality assurance measures -``` - -## Advanced Output Format - -Generate a hierarchical JSON structure with enhanced metadata: - - +**IMPORTANT: ALL generated content, titles, descriptions, and requirements must be written in {{$language}}.** + +## Analysis Framework + +### Project Analysis Requirements + +**Technical Foundation:** +1. **Technology Stack** - Languages, frameworks, dependencies, build tools +2. **Architecture Pattern** - Design patterns, system structure, component relationships +3. **Core Features** - Main functionality, user-facing capabilities, business logic +4. **Implementation Details** - Algorithms, data structures, performance considerations + +**Code Structure Analysis:** +1. **Project Organization** - Directory structure, module separation, dependency flow +2. **Key Components** - Entry points, core classes, service layers, data models +3. **Configuration** - Environment setup, configuration files, deployment requirements +4. **Extension Points** - APIs, plugin systems, customization capabilities + +**Core Component Analysis:** +1. **System Modules** - Main application modules, their responsibilities and interactions +2. **Service Architecture** - Business logic services, data access layers, external integrations +3. **Data Models** - Entity structures, database schemas, data flow patterns +4. **API Interfaces** - REST endpoints, GraphQL schemas, internal APIs +5. **Security Components** - Authentication, authorization, security patterns +6. **Performance Components** - Caching, optimization, scalability features + +**Feature Deep-Dive Analysis:** +1. **Primary Features** - Core user-facing functionality with implementation details +2. **Feature Architecture** - How features are structured and implemented in code +3. **Feature Dependencies** - Inter-feature relationships and shared components +4. **Business Logic** - Core algorithms, workflows, and decision-making processes +5. **Integration Patterns** - How features integrate with external systems +6. **Configuration & Customization** - Feature toggles, configuration options, extensibility + +**Core Functionality Breakdown:** +1. **Feature Decomposition** - Break down major features into sub-features and components +2. **Functional Modules** - Identify discrete functional units and their responsibilities +3. **Workflow Analysis** - Map user workflows and system processes step-by-step +4. **Use Case Implementation** - How different use cases are handled in the codebase +5. **Feature Interaction Matrix** - Dependencies and interactions between different features +6. **Performance & Scalability** - How each feature performs and scales under load +7. **Error Handling & Edge Cases** - How features handle failures and boundary conditions +8. **Testing & Validation** - Testing strategies and validation mechanisms for each feature + +## Content Generation Strategy + +### Getting Started Guide Content: +- **Project Overview**: Core purpose, technology stack, target users, key benefits +- **Environment Setup**: Prerequisites, installation, configuration, verification steps +- **Core Concepts**: Essential terminology, architectural principles, key abstractions +- **Basic Usage**: First examples, common workflows, fundamental operations +- **Quick Reference**: Essential commands, configurations, troubleshooting guide + +### Deep Dive Analysis Content: +- **Architecture Analysis**: System design, component relationships, data flow patterns +- **Core Components**: Detailed analysis of system modules, services, and data layers +- **Feature Implementation**: In-depth examination of key features and business logic +- **Technical Details**: Algorithms, design patterns, performance optimization +- **Integration & APIs**: External interfaces, plugin systems, extension mechanisms +- **Advanced Topics**: Deployment, scaling, customization, development workflows + +## Output Format + +Generate a hierarchical JSON structure organized into two main modules based on actual project analysis. The structure should dynamically adapt to the repository's specific features and complexity. + +### Dynamic Structure Example: + +```json { "items": [ { "title": "getting-started", - "name": "Getting Started", - "children": [ - { - "title": "overview", - "name": "Project Overview", - "prompt": "[Explanation template for project overview]" - }, - { - "title": "quick-start", - "name": "Quick Start", - "prompt": "[Tutorial template for first successful experience]" - } - ] - }, - { - "title": "tutorials", - "name": "Learn Through Practice", - "description": "Guided learning experiences", - "children": [ - { - "title": "fundamentals", - "name": "Core Concepts", - "children": [ - { - "title": "basic-usage", - "name": "Basic Usage Patterns", - "prompt": "[Tutorial template for basic usage patterns]" - } - ] - } - ] - }, - { - "title": "guides", - "name": "How-to Guides", + "name": "[Getting Started Guide Name]", + "description": "Help users quickly understand and start using the project", "children": [ - { - "title": "setup-deployment", - "name": "Setup & Deployment", - "children": [ - { - "title": "installation", - "name": "Installation Guide", - "prompt": "[How-to template for installation]" - } - ] - } + // Dynamic sections based on project analysis: + // - project-overview (always included) + // - environment-setup (if setup is complex) + // - core-concepts (if project has complex abstractions) + // - basic-usage (always included) + // - quick-reference (if many commands/configurations) ] }, { - "title": "reference", - "name": "Technical Reference", - "description": "Comprehensive specifications", + "title": "deep-dive", + "name": "[Deep Dive Analysis Name]", + "description": "In-depth analysis of core components and functionality", "children": [ - { - "title": "api", - "name": "API Reference", - "prompt": "[Reference template for API specifications]" - } - ] - }, - { - "title": "concepts", - "name": "Understanding the System", - "description": "Deep conceptual knowledge", - "children": [ - { - "title": "architecture", - "name": "System Architecture", - "prompt": "[Explanation template for system architecture]" - } + // Dynamic sections based on actual project features: + // - architecture-analysis (always included) + // - core-components (if project has multiple modules/services) + // - feature-implementation (if project has distinct features) + // - technical-implementation (always included) + // - integration-apis (if project has APIs/integrations) ] } ] } - - -## Execution Instructions - -1. **Comprehensive Repository Analysis**: - -- Read and analyze ALL code files thoroughly -- Identify project type, complexity, and architectural patterns -- Map user interaction patterns and use cases - -2. **User-Centered Structure Planning**: - -- Assess primary user personas and their needs -- Design learning pathways and content relationships -- Plan hierarchical organization with appropriate depth - -3. **Adaptive Content Generation**: - -- Scale structure complexity to match project sophistication -- Balance Diátaxis types based on project characteristics -- Create meaningful navigation hierarchies and cross-references - -4. **Enhanced Template Application**: - -- Generate specific, contextual prompts for each section -- Include metadata for improved navigation and discoverability -- Ensure progressive learning pathways and content relationships - -5. **Quality Validation**: - -- Verify all titles are concise and user-focused (2-4 words max) -- Confirm structure supports multiple user journeys -- Validate hierarchical organization and cross-references - -## Engineering Blog Series Success Factors - -**Title Generation Standards:** - -- Maximum 2-4 words per title that capture engineering curiosity -- Focus on engineering insights and technical discoveries, not just features -- Use language that appeals to developers and technical professionals -- Maintain professional tone while being engaging and accessible - -**Content Series Quality Requirements:** - -- Logical narrative flow that tells the complete engineering story -- Natural progression from initial discovery to deep technical understanding -- Multiple entry points for developers with different interests and backgrounds -- Rich technical insights that provide value to experienced engineers -- Balanced content types that serve different learning and exploration needs -- Project-appropriate technical depth and engineering focus +``` -**Final Validation:** +### Dynamic Section Generation Rules: + +**For Getting Started Guide:** +- Always include: project-overview, basic-usage +- Include environment-setup if: complex installation, multiple dependencies, configuration required +- Include core-concepts if: project has complex abstractions, domain-specific terminology +- Include quick-reference if: many CLI commands, configuration options, or operational procedures + +**For Deep Dive Analysis:** +- Always include: architecture-analysis, technical-implementation +- Include core-components if: project has multiple modules, services, or distinct components +- Include feature-implementation if: project has identifiable user-facing features or business logic +- Include integration-apis if: project exposes APIs, has plugin system, or external integrations + +**Sub-section Creation:** +- Break down sections into children only when they contain multiple distinct aspects +- Create 2-3 nesting levels maximum based on actual complexity +- Each child should represent a meaningful, separable analysis area + +## Section Structure Guidelines + +**Each section must include:** +- `title`: Unique identifier (kebab-case) +- `name`: Display name in {{$language}} +- `requirement`: Specific, actionable generation instruction in {{$language}} +- `children`: Optional array for complex topics requiring detailed breakdown + +**Nesting Levels:** +- **Level 1**: Main sections (overview, setup, analysis, etc.) +- **Level 2**: Sub-topics within main sections (components, features, etc.) +- **Level 3**: Detailed aspects for complex features (algorithms, patterns, etc.) + +**Sub-section Creation Rules:** +- System modules with multiple responsibilities +- Complex features requiring component breakdown +- Technical concepts needing layered explanation +- Business logic with multiple workflows +- Integration patterns with various approaches + +## Content Depth Requirements + +### Getting Started Guide Requirements: +- **Project Overview**: Technology stack analysis, architectural overview, core value analysis +- **Environment Setup**: Step-by-step installation, dependency management, configuration validation +- **Core Concepts**: Technical terminology, system abstractions, component relationships +- **Basic Usage**: Practical examples, workflow demonstrations, operational procedures + +### Deep Dive Analysis Requirements: +- **Architecture Analysis**: Design pattern identification, component interaction mapping, scalability analysis +- **Core Components**: + - System module responsibilities and interfaces + - Service layer architecture and dependencies + - Data model relationships and schemas + - API design patterns and endpoints +- **Feature Implementation**: + - Core functionality breakdown with feature decomposition into sub-components + - Business logic and workflow analysis with step-by-step process mapping + - Feature architecture patterns and structural organization + - Use case implementation analysis and user scenario handling + - Feature interaction matrix and dependency mapping + - Performance characteristics and scalability analysis per feature + - Error handling mechanisms and edge case management + - Testing strategies and validation approaches for each functional module +- **Technical Implementation**: Algorithm complexity, design pattern usage, security implementations +- **Integration & APIs**: External system interfaces, plugin architectures, extension mechanisms -- Content series supports complete developer journey from curiosity to expertise -- Blog post organization feels natural and intellectually satisfying -- Complete repository analysis drives all content decisions and insights -- Engineering blog architecture matches the sophistication of the project being analyzed +## Execution Instructions -Generate engineering blog content catalogs that transform complex software projects into compelling technical stories -that inspire, educate, and provide practical value to fellow developers and engineering professionals. \ No newline at end of file +1. **Repository Analysis**: + - Analyze provided code files to understand project purpose, architecture, and features + - Identify technology stack, core components, and implementation patterns + - Extract key functionality, business logic, and technical design decisions + - Map system modules, service layers, data models, and API interfaces + +2. **Documentation Structure Generation**: + - Dynamically create "Getting Started Guide" with 3-6 sections based on actual project needs + - Dynamically create "Deep Dive Analysis" with 3-7 sections based on project complexity and features + - Adapt nesting levels (2-3 levels) based on actual component complexity, not fixed structure + - Only include sections that are relevant to the actual project (don't force unnecessary sections) + - Create sub-sections only when the parent section contains multiple distinct, separable aspects + - Structure should reflect the project's actual organization and feature set + +3. **Requirements Generation**: + - Create specific, actionable requirements based on what actually exists in the project + - Tailor analysis depth to match the actual complexity of each component or feature + - Generate requirements that reflect the project's specific technology stack and patterns + - Only demand feature decomposition if the project actually has complex, multi-part features + - Adapt workflow analysis requirements to the project's actual business processes + - Scale technical depth requirements based on the project's actual implementation sophistication + - Ensure all requirements focus on real, identifiable elements in the codebase + - Requirements should be written in {{$language}} and match the project's domain + +4. **Quality Assurance**: + - Verify that the structure accurately reflects the actual project's architecture and features + - Ensure no unnecessary sections are included (only what's relevant to this specific project) + - Validate that complexity levels match the real sophistication of the codebase + - Confirm that all sections and requirements are grounded in actual code analysis + - Ensure logical progression that matches the project's natural learning curve + - Validate that technical depth is appropriate for the project's actual implementation level + +## Success Criteria + +**Documentation Quality:** +- Deep technical analysis of actual project components and implementations +- Comprehensive coverage of system modules, services, data models, and APIs +- Detailed feature decomposition with sub-component analysis and functional module breakdown +- Thorough examination of core functionality, business logic, workflows, and algorithms +- Complete use case implementation analysis and feature interaction mapping +- Clear progression from basic understanding to advanced implementation details +- Practical examples and real code analysis with architectural insights + +**Structure Balance:** +- Getting Started Guide provides solid foundation with core concepts and basic usage +- Deep Dive Analysis delivers exhaustive technical understanding of all major components +- Core Components section thoroughly covers system modules, services, and data architecture +- Feature Implementation section provides detailed analysis of business logic and workflows +- Core Functionality Breakdown delivers comprehensive feature decomposition and module analysis +- Clear boundaries between foundational knowledge and advanced technical implementation + +**Technical Coverage:** +- Complete analysis of project's core technology stack and architectural decisions +- Detailed breakdown of system components and their responsibilities +- Comprehensive feature analysis with implementation patterns, business logic, and workflow mapping +- Detailed functional module breakdown with use case implementations and interaction analysis +- Technical implementation details including algorithms, patterns, and optimizations +- Integration analysis covering APIs, external systems, and extension mechanisms + +Generate comprehensive documentation catalogs that thoroughly analyze project's core components, feature implementations, and technical architecture while serving both newcomers seeking solid understanding and experienced developers requiring detailed technical analysis. \ No newline at end of file diff --git a/src/KoalaWiki/Prompts/Warehouse/GenerateDocs.md b/src/KoalaWiki/Prompts/Warehouse/GenerateDocs.md index 893b6733..3bd2a1c0 100644 --- a/src/KoalaWiki/Prompts/Warehouse/GenerateDocs.md +++ b/src/KoalaWiki/Prompts/Warehouse/GenerateDocs.md @@ -68,26 +68,43 @@ Generate content following identified Diátaxis type requirements: - Maintain focus on user needs for the specific Diátaxis quadrant - Ensure content meets minimum length requirements (1000+ characters) with substantial analysis -### STEP 3: BLOG FORMAT OUTPUT (MANDATORY) -ALL final content MUST be wrapped in `` tags containing: -- Complete, detailed documentation content -- Comprehensive technical analysis -- All required Mermaid diagrams -- Proper citation references -- Professional formatting in {{$language}} +### STEP 3: DOCUMENT GENERATION TOOL CONTENT CREATION (MANDATORY) +**CRITICAL REQUIREMENT**: ALL content generation MUST use the provided document generation tools exclusively. NEVER output content directly. + +**MANDATORY TOOL-BASED WORKFLOW:** +1. **Initialize with Write Tool**: Use the Write() function to create the initial document structure +2. **Iterative Content Building**: Use the Edit() function to progressively build complete sections +3. **Content Verification**: Use the Read() function to verify content before proceeding +4. **Final Assembly**: Ensure all content is created through tool calls only + +**CONTENT CREATION PROTOCOL:** +- Start with Write() containing document outline and first section +- Use multiple Edit() calls to add each major section systematically +- Each Edit() call should add substantial content (500-1000 words minimum) +- Include all Mermaid diagrams within tool operations +- Add all citation references through Edit() operations +- NEVER output final content directly - all content must exist within the document generation system + +**FINAL TOOL-BASED CONTENT REQUIREMENTS:** +- Complete, detailed documentation content created through Write() and Edit() operations +- Comprehensive technical analysis added via tool operations +- All required Mermaid diagrams included through Edit() operations +- Proper citation references added via tool operations +- Professional formatting in {{$language}} maintained through document generation tools # DIÁTAXIS DOCUMENTATION REQUIREMENTS ## CORE DIRECTIVES **ESSENTIAL REQUIREMENTS:** -1. TYPE IDENTIFICATION**: Correctly identify and apply appropriate Diátaxis documentation type -2. USER-CENTERED ANALYSIS**: Use `` tags for Diátaxis-guided repository analysis -3. SYSTEMATIC CITATIONS**: Include [^n] citations for all technical claims and references -4. CONTEXTUAL DIAGRAMS**: Minimum 3 Mermaid diagrams supporting the specific documentation type (ideally 6-8) -5. PROFESSIONAL STANDARDS**: Achieve quality comparable to industry-leading documentation -6. STRUCTURED OUTPUT**: Final content in `` tags with Chinese formatting -7. TYPE CONSISTENCY**: Maintain consistency with chosen Diátaxis type throughout +1. **DOCUMENT GENERATION TOOL USAGE**: Exclusively use available document generation tools for ALL content creation - NEVER output content directly +2. **TYPE IDENTIFICATION**: Correctly identify and apply appropriate Diátaxis documentation type +3. **USER-CENTERED ANALYSIS**: Use `` tags for Diátaxis-guided repository analysis +4. **SYSTEMATIC CITATIONS**: Include [^n] citations for all technical claims and references via Edit() operations +5. **CONTEXTUAL DIAGRAMS**: Minimum 3 Mermaid diagrams supporting the specific documentation type (ideally 6-8) added through Edit() operations +6. **PROFESSIONAL STANDARDS**: Achieve quality comparable to industry-leading documentation through tool operations +7. **TOOL-BASED CONTENT CREATION**: Final content must exist entirely within the document generation system - no direct output allowed +8. **TYPE CONSISTENCY**: Maintain consistency with chosen Diátaxis type throughout tool-based content creation # CORE DIRECTIVES @@ -940,13 +957,13 @@ Generate documentation that demonstrates technical excellence through systematic - **Code as Evidence**: Use strategic code examples to support your analysis and explanations, showing the actual implementation that backs up your technical insights **TECHNICAL OUTPUT FORMAT REQUIREMENTS:** -- Wrap all content in `` tags -- Maintain professional technical writing standards -- **MANDATORY TECHNICAL MERMAID DIAGRAMS**: Include minimum 6-8 comprehensive Mermaid diagrams throughout the documentation, with each major section containing relevant technical architecture, workflow, or component diagrams -- **Technical Process Visualization**: Every significant technical process, design pattern, or architectural excellence must be visualized with appropriate Mermaid diagrams -- **Technical-to-Code Mapping**: Ensure every diagram element represents actual technical implementations and patterns found in the analyzed files -- **STRATEGIC CODE EXAMPLES**: Include essential code examples for critical usage patterns, properly contextualized within comprehensive technical analysis -- **MANDATORY CITATION SYSTEM**: Integrate footnote citations [^n] with proper file references formatted as: `[^n]: [Technical Description]({{$git_repository}}/tree/{{$branch}}/path/file#Lstart-Lend)` +- **MANDATORY TOOL USAGE**: ALL content creation must use document generation tools exclusively +- **NO DIRECT OUTPUT ALLOWED**: Never output documentation content directly in response +- **TOOL-BASED CONTENT STRUCTURE**: Create content through Write() and Edit() operations +- **TOOL-BASED MERMAID INTEGRATION**: Include minimum 6-8 comprehensive Mermaid diagrams through Edit() operations +- **TOOL-BASED TECHNICAL VISUALIZATION**: Every significant technical process must be visualized through tool operations +- **TOOL-BASED CODE REFERENCES**: Ensure every diagram element represents actual implementations through tool operations +- **TOOL-BASED CITATION SYSTEM**: Integrate footnote citations [^n] with proper file references through Edit() operations ## Technical Citation Implementation Guidelines @@ -1266,16 +1283,14 @@ Write technical content that demonstrates the perspective of a senior engineer w Generate compelling engineering blog content that reads like an experienced developer's deep exploration of an interesting codebase. Create technical storytelling that reveals the engineering insights, architectural decisions, and implementation wisdom that other developers would find valuable and inspiring. Focus on the human story behind the code - the problems being solved, the clever solutions employed, and the engineering craftsmanship that makes this project worth understanding and learning from. -**FINAL OUTPUT REQUIREMENT**: The final result MUST be COMPLETE documentation content wrapped in `` tags, written in {{$language}}, following the identified Diátaxis type, with minimum 3 contextual Mermaid diagrams, proper citations, and professional formatting. +**FINAL OUTPUT REQUIREMENT**: The final result MUST be complete documentation content created entirely through available document generation tools. Use Write() to initialize and Edit() to build comprehensive content in {{$language}}, following the identified Diátaxis type, with minimum 3 contextual Mermaid diagrams, proper citations, and professional formatting. -**CRITICAL OUTPUT FORMAT**: -- ALL final content MUST be wrapped in `` tags -- NO content should appear outside these tags in the final response -- The `` opening tag should be on its own line -- The `` closing tag should be on its own line -- Content inside should be properly formatted {{$language}} documentation -- `` tags and their content will be automatically removed from final output -- Use `` tags only for analysis and planning, never for content meant to be visible +**CRITICAL TOOL-BASED OUTPUT PROTOCOL**: +- **NO DIRECT CONTENT OUTPUT**: Never output documentation content directly in your response +- **EXCLUSIVE TOOL USAGE**: ALL content must be created through Write() and Edit() operations +- **PROGRESSIVE CONTENT BUILDING**: Use multiple Edit() calls to systematically build complete documentation +- **TOOL-BASED CONTENT VERIFICATION**: Use Read() to verify content structure and completeness +- **TOOL-BASED CONTENT CREATION**: The user will access the final documentation through the document generation system, not through direct response output --- @@ -1289,10 +1304,10 @@ Generate compelling engineering blog content that reads like an experienced deve 4. **Type Purity**: No mixing of different documentation types within content **Content Quality Standards:** -5. **Contextual Diagrams**: Minimum 3 Mermaid diagrams appropriate for documentation type -6. **Systematic Citations**: [^n] references for all technical claims -7. **Repository Grounding**: Content based on actual code analysis -8. **Proper Formatting**: {{$language}} content wrapped in `` tags +5. **Contextual Diagrams**: Minimum 3 Mermaid diagrams appropriate for documentation type added through Edit() operations +6. **Systematic Citations**: [^n] references for all technical claims added via tool operations +7. **Repository Grounding**: Content based on actual code analysis created through document generation tools +8. **Tool-Based Content Creation**: {{$language}} content created entirely through Write() and Edit() operations This Diátaxis-optimized approach ensures documentation truly serves user intent and provides maximum value within the specific user context. diff --git a/src/KoalaWiki/Prompts/Warehouse/GenerateMindMap.md b/src/KoalaWiki/Prompts/Warehouse/GenerateMindMap.md index 14002ed4..ae21f647 100644 --- a/src/KoalaWiki/Prompts/Warehouse/GenerateMindMap.md +++ b/src/KoalaWiki/Prompts/Warehouse/GenerateMindMap.md @@ -1,9 +1,27 @@  You are an Expert Code Architecture Analyst specializing in transforming complex repositories into intelligent, navigable knowledge graphs. Your expertise lies in architectural pattern recognition, system design analysis, and creating structured representations that reveal both explicit structure and implicit design wisdom. + +You have deep understanding of modern .NET application patterns, including: +- Layered Architecture (Domain, Service, Infrastructure layers) +- Document Processing Pipelines and Orchestration patterns +- Semantic Kernel AI integration patterns +- Entity Framework Core with multi-provider architecture +- ASP.NET Core with background services and middleware +- Microservices with Aspire orchestration +- Git repository analysis and code intelligence systems Generate a comprehensive architectural mind map that serves as both a navigation tool and knowledge base for understanding the repository's design philosophy, component relationships, and implementation strategies. + +Focus on revealing: +- Multi-layered architecture patterns (Domain-Service-Infrastructure) +- AI/ML integration patterns with Semantic Kernel +- Document processing workflows and pipeline orchestration +- Code analysis and semantic understanding systems +- Git integration and repository management patterns +- Multi-database provider abstraction patterns +- Background processing and workflow orchestration @@ -16,29 +34,95 @@ Codebase: {{$code_files}} ### Phase 1: Architectural Intelligence Extraction -1. **Design Philosophy Recognition**: Identify the core architectural principles (microservices, modular monolith, layered, etc.) -2. **Pattern Detection**: Recognize design patterns, architectural styles, and structural conventions -3. **Technology Stack Analysis**: Understand the rationale behind technology choices and their interdependencies -4. **System Boundaries**: Map service boundaries, module interfaces, and integration points -5. **Quality Attributes**: Assess scalability, maintainability, testability, and security considerations +1. **Design Philosophy Recognition**: Identify core architectural principles (layered architecture, domain-driven design, pipeline patterns) +2. **Pattern Detection**: Recognize key patterns including: + - Repository Pattern with Entity Framework Core + - Pipeline Processing with Orchestration + - Service Layer Pattern with Dependency Injection + - Multi-Provider Database Abstraction + - Background Service Processing + - Semantic Kernel AI Integration +3. **Technology Stack Analysis**: Focus on .NET 9.0 ecosystem including: + - ASP.NET Core Web API with OpenAPI/Scalar + - Entity Framework Core with multi-database providers + - Semantic Kernel for AI functionality + - LibGit2Sharp for Git operations + - Aspire for application orchestration + - Serilog for structured logging +4. **System Boundaries**: Map key boundaries: + - Domain layer (entities and business logic) + - Service layer (application services and orchestration) + - Infrastructure layer (data access, external integrations) + - API layer (controllers and endpoints) + - Background processing layer +5. **Quality Attributes**: Assess: + - Scalability through background processing and pipelines + - Maintainability through layered architecture + - Extensibility through provider patterns and dependency injection + - Observability through structured logging and activities ### Phase 2: Relationship Network Mapping -1. **Dependency Networks**: Map compile-time, runtime, and logical dependencies -2. **Data Flow Analysis**: Trace how information flows through the system -3. **Control Flow Patterns**: Understand execution paths and system behavior -4. **Interface Contracts**: Analyze APIs, protocols, and communication patterns -5. **Configuration Dependencies**: Identify environment and deployment relationships +1. **Dependency Networks**: Map key dependency patterns: + - Domain entities → Service layer → Infrastructure layer + - Pipeline orchestration → Processing steps → Semantic Kernel services + - Controllers → Services → Repository providers + - Background services → Document processing workflows +2. **Data Flow Analysis**: Trace critical flows: + - Git repository ingestion → Document processing pipeline → Knowledge generation + - User requests → Service orchestration → Database operations + - AI model interactions → Semantic Kernel → Response generation + - File processing → Code analysis → Documentation generation +3. **Control Flow Patterns**: Key execution paths: + - Document processing orchestration with resilient execution + - Background task processing with queue management + - AI service integration with retry policies + - Multi-database provider switching +4. **Interface Contracts**: Critical interfaces: + - IKoalaWikiContext for data access abstraction + - IDocumentProcessingPipeline for workflow orchestration + - ILanguageParser for code analysis + - Semantic Kernel function interfaces +5. **Configuration Dependencies**: Environment considerations: + - Multi-database provider configuration (SQLite, PostgreSQL, MySQL, SQL Server) + - AI model configuration (OpenAI, Anthropic via Semantic Kernel) + - Git authentication and repository access + - Aspire orchestration and service discovery ### Phase 3: Conceptual Model Construction -1. **Domain Model Identification**: Extract business concepts and domain entities -2. **Responsibility Mapping**: Understand how concerns are separated and responsibilities distributed -3. **Abstraction Layers**: Identify levels of abstraction and their purposes -4. **Extension Mechanisms**: Find customization points and plugin architectures -5. **Evolution Patterns**: Recognize how the system is designed to grow and adapt +1. **Domain Model Identification**: Core business concepts: + - Warehouse (Git repository container with metadata) + - Document (processed repository documentation) + - DocumentCatalog/DocumentFileItem (hierarchical content structure) + - User/Role/Permission (access control and authorization) + - Statistics/AccessRecord (analytics and usage tracking) + - FineTuning/TrainingDataset (AI model customization) +2. **Responsibility Mapping**: Clear separation of concerns: + - Domain Layer: Pure business entities and rules + - Service Layer: Application logic and orchestration + - Infrastructure Layer: Data persistence and external integrations + - Pipeline Layer: Document processing workflows + - Background Services: Asynchronous processing +3. **Abstraction Layers**: Multi-level abstractions: + - Database provider abstraction (IKoalaWikiContext) + - Language parsing abstraction (ILanguageParser, ISemanticAnalyzer) + - AI service abstraction (Semantic Kernel integration) + - Processing step abstraction (IDocumentProcessingStep) +4. **Extension Mechanisms**: Designed for extensibility: + - Provider pattern for database backends + - Plugin system for language parsers and analyzers + - Pipeline step registration for custom processing + - Semantic Kernel function registration + - MCP (Model Context Protocol) tool integration +5. **Evolution Patterns**: Growth and adaptation strategies: + - Modular pipeline architecture for adding new processing steps + - Multi-provider pattern for supporting new databases + - Language parser extensibility for new programming languages + - AI model provider flexibility through Semantic Kernel + - Background service scalability for increased processing demands ## Output Format Specifications @@ -81,24 +165,56 @@ Codebase: {{$code_files}} ## Intelligent Analysis Process -Before generating output, perform deep architectural analysis: - -1. **Repository Context Assessment**: What type of system is this? What are its primary concerns? -2. **Architectural Pattern Recognition**: What patterns and principles govern this system? -3. **Component Significance Ranking**: Which components are architecturally most important? -4. **Relationship Importance**: What are the most critical system relationships? -5. **User Navigation Needs**: How would someone best explore and understand this system? - -Consider multiple perspectives: developer onboarding, system maintenance, feature development, and architectural evolution. +Before generating output, perform deep architectural analysis considering KoalaWiki's specific characteristics: + +1. **Repository Context Assessment**: KoalaWiki is an AI-powered documentation and knowledge management system that: + - Processes Git repositories to generate intelligent documentation + - Provides code analysis and semantic understanding + - Offers multi-language support with extensible parsing + - Integrates AI models through Semantic Kernel + - Supports multiple database backends + +2. **Architectural Pattern Recognition**: Key patterns governing this system: + - **Layered Architecture**: Clear separation between Domain, Service, and Infrastructure layers + - **Pipeline Pattern**: Document processing through orchestrated steps + - **Provider Pattern**: Multi-database and multi-language support + - **Background Processing**: Asynchronous document processing workflows + - **Repository Pattern**: Data access abstraction with EF Core + - **Dependency Injection**: Service composition and lifecycle management + +3. **Component Significance Ranking**: Most architecturally important components: + - **KoalaWarehouse Pipeline**: Core document processing orchestration + - **Domain Entities**: Business model foundation (Warehouse, Document, etc.) + - **Service Layer**: Application logic and business workflows + - **Code Analysis System**: Language parsing and semantic analysis + - **AI Integration**: Semantic Kernel and model management + - **Multi-Provider Data Access**: Database abstraction layer + +4. **Relationship Importance**: Critical system relationships: + - Document processing pipeline orchestration flows + - Domain entity relationships and data consistency + - AI service integration and prompt management + - Git repository analysis and code intelligence + - Background service coordination and task management + - Multi-database provider switching and configuration + +5. **User Navigation Needs**: System exploration priorities: + - Start with domain concepts to understand business model + - Explore pipeline architecture for core functionality + - Understand service layer for application logic + - Examine AI integration for intelligent features + - Review extension mechanisms for customization + +Consider perspectives: developer onboarding (domain-first), system maintenance (service patterns), feature development (pipeline extension), architectural evolution (provider patterns). ### Execution Strategy -1. **Rapid Architecture Scan**: Quickly identify the system's primary architectural approach -2. **Critical Path Analysis**: Focus on the most important components and relationships first -3. **Layered Decomposition**: Break down from system level to implementation details -4. **Cross-Reference Validation**: Ensure all major components and relationships are captured -5. **Navigation Optimization**: Structure for intuitive exploration and understanding +1. **KoalaWiki Architecture Scan**: Identify the layered architecture with pipeline processing core +2. **Critical Path Analysis**: Focus on document processing workflows, domain entities, and AI integration first +3. **Layered Decomposition**: Structure as Domain → Service → Infrastructure → Pipeline → AI layers +4. **Component Relationship Mapping**: Trace flows from Git ingestion through AI processing to documentation output +5. **Navigation Optimization**: Organize for developer understanding of both business concepts and technical implementation ## Quality Assurance diff --git a/src/KoalaWiki/Services/DocumentCatalogService.cs b/src/KoalaWiki/Services/DocumentCatalogService.cs index cb93d310..13b726ff 100644 --- a/src/KoalaWiki/Services/DocumentCatalogService.cs +++ b/src/KoalaWiki/Services/DocumentCatalogService.cs @@ -96,6 +96,9 @@ public async Task GetDocumentCatalogsAsync(string organizationName, stri public async Task GetDocumentByIdAsync(string owner, string name, string? branch, string path, string? languageCode, HttpContext httpContext) { + // URL解码,处理包含特殊字符(如日文字符)的路径 + var decodedPath = System.Web.HttpUtility.UrlDecode(path); + // 先根据仓库名称和组织名称找到仓库 var warehouse = await dbAccess.Warehouses .AsNoTracking() @@ -112,7 +115,7 @@ public async Task GetDocumentByIdAsync(string owner, string name, string? branch // 找到catalog var id = await dbAccess.DocumentCatalogs .AsNoTracking() - .Where(x => x.WarehouseId == warehouse.Id && x.Url == path && x.IsDeleted == false) + .Where(x => x.WarehouseId == warehouse.Id && x.Url == decodedPath && x.IsDeleted == false) .Select(x => x.Id) .FirstOrDefaultAsync(); @@ -169,9 +172,23 @@ private object ToFileSource(DocumentFileItemSource fileItemSource, Warehouse? wa if (warehouse.Address.StartsWith("https://github.com/") || warehouse.Address.StartsWith("https://gitee.com/")) { - url = warehouse.Address.TrimEnd('/') + $"/tree/{warehouse.Branch}/" + fileItemSource.Address; + // 删除.git后缀 + url = warehouse.Address + .Replace(".git", string.Empty) + .TrimEnd('/') + $"/tree/{warehouse.Branch}/" + fileItemSource.Address; } - + // TODO: 兼容其他提供商 + else if(warehouse.Address.StartsWith("https://gitlab.com/")) + { + url = warehouse.Address + .Replace(".git", string.Empty) + .TrimEnd('/') + $"/-/tree/{warehouse.Branch}/" + fileItemSource.Address; + } + else + { + url = warehouse.Address.TrimEnd('/') + "/" + fileItemSource.Address; + } + var name = Path.GetFileName(fileItemSource.Address); return new diff --git a/src/KoalaWiki/plugins/CodeAnalysis/CodeDirSimplifier/config.json b/src/KoalaWiki/plugins/CodeAnalysis/CodeDirSimplifier/config.json index 56b253a4..b188d52f 100644 --- a/src/KoalaWiki/plugins/CodeAnalysis/CodeDirSimplifier/config.json +++ b/src/KoalaWiki/plugins/CodeAnalysis/CodeDirSimplifier/config.json @@ -35,7 +35,11 @@ "max_tokens": 8192 , "temperature": 0.5 }, - "gemini-2.5-pro-preview-05-06": { + "gemini-2.5-pro": { + "max_tokens": 16384 , + "temperature": 0.5 + }, + "gemini-2.5-flash": { "max_tokens": 16384 , "temperature": 0.5 }, diff --git a/src/KoalaWiki/plugins/CodeAnalysis/CommitAnalyze/config.json b/src/KoalaWiki/plugins/CodeAnalysis/CommitAnalyze/config.json index c4c0bc31..ee39feb5 100644 --- a/src/KoalaWiki/plugins/CodeAnalysis/CommitAnalyze/config.json +++ b/src/KoalaWiki/plugins/CodeAnalysis/CommitAnalyze/config.json @@ -35,7 +35,11 @@ "max_tokens": 8192 , "temperature": 0.5 }, - "gemini-2.5-pro-preview-05-06": { + "gemini-2.5-pro": { + "max_tokens": 16384 , + "temperature": 0.5 + }, + "gemini-2.5-flash": { "max_tokens": 16384 , "temperature": 0.5 }, diff --git a/src/KoalaWiki/plugins/CodeAnalysis/GenerateReadme/config.json b/src/KoalaWiki/plugins/CodeAnalysis/GenerateReadme/config.json index e79979f0..12ee22f2 100644 --- a/src/KoalaWiki/plugins/CodeAnalysis/GenerateReadme/config.json +++ b/src/KoalaWiki/plugins/CodeAnalysis/GenerateReadme/config.json @@ -35,10 +35,14 @@ "max_tokens": 8192 , "temperature": 0.5 }, - "gemini-2.5-pro-preview-05-06": { + "gemini-2.5-pro": { "max_tokens": 65536 , "temperature": 0.5 }, + "gemini-2.5-flash": { + "max_tokens": 16384 , + "temperature": 0.5 + }, "Qwen/Qwen3-235B-A22B": { "max_tokens": 32768 , "temperature": 0.5 diff --git a/web/app/[owner]/[name]/page.tsx b/web/app/[owner]/[name]/page.tsx index 31aa5a87..00ab0656 100644 --- a/web/app/[owner]/[name]/page.tsx +++ b/web/app/[owner]/[name]/page.tsx @@ -36,8 +36,8 @@ export default async function RepositoryPage({ params, searchParams }: any) { if (catalogResponse.success && catalogResponse.data?.items && catalogResponse.data.items.length > 0) { const firstMenuItem = catalogResponse.data.items[0] if (firstMenuItem?.url) { - // 重定向到第一个菜单项 - redirect(`/${owner}/${name}/${firstMenuItem.url}`); + // 重定向到第一个菜单项,必须编码以避免日文等字符导致的redirect错误 + redirect(`/${owner}/${name}/${encodeURIComponent(firstMenuItem.url)}`); } } } catch (error) { diff --git a/web/app/services/warehouseService.ts b/web/app/services/warehouseService.ts index 44a73be5..1bead980 100644 --- a/web/app/services/warehouseService.ts +++ b/web/app/services/warehouseService.ts @@ -339,13 +339,13 @@ export async function getWarehouse(page: number, pageSize: number, keyword?: str * 此函数可在服务器组件中使用 */ export async function documentCatalog(organizationName: string, name: string, branch?: string, languageCode?: string): Promise { - // 构建URL,如果branch存在则添加到查询参数中 - let url = API_URL + '/api/DocumentCatalog/DocumentCatalogs?organizationName=' + organizationName + '&name=' + name; + // 构建URL,如果branch存在则添加到查询参数中,并确保所有参数都被正确编码以处理特殊字符 + let url = API_URL + '/api/DocumentCatalog/DocumentCatalogs?organizationName=' + encodeURIComponent(organizationName) + '&name=' + encodeURIComponent(name); if (branch) { - url += '&branch=' + branch; + url += '&branch=' + encodeURIComponent(branch); } if (languageCode) { - url += '&languageCode=' + languageCode; + url += '&languageCode=' + encodeURIComponent(languageCode); } // @ts-ignore @@ -361,13 +361,13 @@ export async function documentCatalog(organizationName: string, name: string, br * 此函数可在服务器组件中使用 */ export async function documentById(owner: string, name: string, path: string, branch?: string, languageCode?: string): Promise { - // 构建URL,如果branch存在则添加到查询参数中 - let url = API_URL + '/api/DocumentCatalog/DocumentById?owner=' + owner + '&name=' + name + '&path=' + path; + // 构建URL,如果branch存在则添加到查询参数中,并对除path外的参数进行编码(path在后端进行解码处理) + let url = API_URL + '/api/DocumentCatalog/DocumentById?owner=' + encodeURIComponent(owner) + '&name=' + encodeURIComponent(name) + '&path=' + path; if (branch) { - url += '&branch=' + branch; + url += '&branch=' + encodeURIComponent(branch); } if (languageCode) { - url += '&languageCode=' + languageCode; + url += '&languageCode=' + encodeURIComponent(languageCode); } // @ts-ignore diff --git a/web/app/settings/page.tsx b/web/app/settings/page.tsx index 9c105ed2..90b9f291 100644 --- a/web/app/settings/page.tsx +++ b/web/app/settings/page.tsx @@ -14,7 +14,8 @@ import { Save, Camera, CheckCircle, - Upload + Upload, + Loader2 } from 'lucide-react'; import { Button } from '@/components/ui/button'; @@ -100,6 +101,7 @@ export default function SettingsPage() { const [activeSection, setActiveSection] = useState('profile'); const [loading, setLoading] = useState(false); + const [avatarUploading, setAvatarUploading] = useState(false); const [userInfo, setUserInfo] = useState(null); const [avatarUrl, setAvatarUrl] = useState(''); @@ -221,7 +223,7 @@ export default function SettingsPage() { } try { - setLoading(true); + setAvatarUploading(true); const { data } = await uploadAvatar(file) as any; if (data.code === 200 && data.data) { // 添加时间戳参数到头像URL以防止浏览器缓存 @@ -251,7 +253,7 @@ export default function SettingsPage() { console.error('头像上传失败:', error); toast.error('头像上传失败,请重试'); } finally { - setLoading(false); + setAvatarUploading(false); } return false; @@ -287,31 +289,39 @@ export default function SettingsPage() { case 'profile': return (
-
-

账户信息

-

管理您的个人资料和基本信息

+
+

账户信息

+

管理您的个人资料和基本信息

-
+
{/* 头像卡片 */} - - -
-
- - - - - - + + +
+
+
+ + + + + + + {avatarUploading && ( +
+ +
+ )} +
-
-

{userInfo?.name}

-

{userInfo?.email}

- +
+
+

{userInfo?.name}

+

{userInfo?.email}

+
+ 已验证 +

+ 支持 JPG、PNG、GIF 格式,文件大小不超过 2MB +

{/* 基本信息表单 */} - - - 基本信息 + + + + + 基本信息 + + + 管理您的个人资料信息 + - +
- -
+ +
( - - 用户名 + + 用户名 - + @@ -362,20 +391,39 @@ export default function SettingsPage() { control={profileForm.control} name="email" render={({ field }) => ( - - 邮箱地址 + + 邮箱地址 - + )} />
- +
+ +
@@ -387,71 +435,107 @@ export default function SettingsPage() { case 'security': return (
-
-

安全设置

-

管理您的账户安全和密码设置

+
+

安全设置

+

管理您的账户安全和密码设置

-
- - - - +
+ + + + 修改密码 - - 定期更改密码有助于保护您的账户安全 + + 定期更改密码有助于保护您的账户安全。请确保使用强密码。 - +
- - ( - - 当前密码 - - - - - - )} - /> - ( - - 新密码 - - - - - 密码必须至少8个字符,包含大小写字母和数字 - - - - )} - /> - ( - - 确认新密码 - - - - - - )} - /> - + +
+ ( + + 当前密码 + + + + + + )} + /> + +
+ ( + + 新密码 + + + + + 密码必须至少8个字符,包含大小写字母和数字 + + + + )} + /> + ( + + 确认新密码 + + + + + + )} + /> +
+
+ +
+ +
@@ -463,12 +547,62 @@ export default function SettingsPage() { case 'apps': return (
-
-

应用管理

-

管理与您账户连接的应用

+
+

应用管理

+

管理与您账户连接的应用

-
- +
+ + + + + 应用管理 + + + 管理您的应用偏好设置和通知选项 + + + +
+
+
+ +

+ 接收重要更新和通知邮件 +

+
+ +
+ +
+
+ +

+ 在桌面显示实时通知 +

+
+ +
+ +
+
+ +

+ 自动保存您的工作进度 +

+
+ +
+
+ +
+
+ + 设置将自动保存 +
+
+
+
); @@ -507,28 +641,29 @@ export default function SettingsPage() { {/* Main content */} - + {/* 顶部导航栏 */} -
-
- +
+
+ +
-

设置

+

设置

{/* 内容区域 */} -
+
{loading && (