diff --git a/.github/workflows/publish-gem.yml b/.github/workflows/publish-gem.yml new file mode 100644 index 0000000..db6ea75 --- /dev/null +++ b/.github/workflows/publish-gem.yml @@ -0,0 +1,31 @@ +# This workflow is triggered when a GitHub release is created. +# It can also be run manually to re-publish to rubygems.org in case it failed for some reason. +# You can run this workflow by navigating to https://www.github.com/ScrapeGraphAI/scrapegraphai-ruby/actions/workflows/publish-gem.yml +name: Publish Gem +on: + workflow_dispatch: + + release: + types: [published] + +jobs: + publish: + name: publish + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + bundler-cache: false + - run: |- + bundle install + + - name: Publish to RubyGems.org + run: | + bash ./bin/publish-gem + env: + # `RUBYGEMS_HOST` is only required for private gem repositories, not https://rubygems.org + RUBYGEMS_HOST: ${{ secrets.SCRAPEGRAPHAI_RUBYGEMS_HOST || secrets.RUBYGEMS_HOST }} + GEM_HOST_API_KEY: ${{ secrets.SCRAPEGRAPHAI_GEM_HOST_API_KEY || secrets.GEM_HOST_API_KEY }} diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml new file mode 100644 index 0000000..c8e8fe8 --- /dev/null +++ b/.github/workflows/release-doctor.yml @@ -0,0 +1,22 @@ +name: Release Doctor +on: + pull_request: + branches: + - main + workflow_dispatch: + +jobs: + release_doctor: + name: release doctor + runs-on: ubuntu-latest + if: github.repository == 'ScrapeGraphAI/scrapegraphai-ruby' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next') + + steps: + - uses: actions/checkout@v4 + + - name: Check release environment + run: | + bash ./bin/check-release-environment + env: + RUBYGEMS_HOST: ${{ secrets.SCRAPEGRAPHAI_RUBYGEMS_HOST || secrets.RUBYGEMS_HOST }} + GEM_HOST_API_KEY: ${{ secrets.SCRAPEGRAPHAI_GEM_HOST_API_KEY || secrets.GEM_HOST_API_KEY }} diff --git a/.release-please-manifest.json b/.release-please-manifest.json new file mode 100644 index 0000000..3d2ac0b --- /dev/null +++ b/.release-please-manifest.json @@ -0,0 +1,3 @@ +{ + ".": "0.1.0" +} \ No newline at end of file diff --git a/.rubocop.yml b/.rubocop.yml index 0526c49..5707b00 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://www.rubyschema.org/rubocop.json --- # Explicitly disable pending cops for now. This is the default behaviour but # this avoids a large warning every time we run it. @@ -8,7 +9,7 @@ AllCops: - "bin/*" NewCops: enable SuggestExtensions: false - TargetRubyVersion: 3.2.0 + TargetRubyVersion: 3.2 # Whether MFA is required or not should be left to the token configuration. Gemspec/RequireMFA: @@ -120,6 +121,10 @@ Metrics/BlockLength: Metrics/ClassLength: Enabled: false +Metrics/CollectionLiteralLength: + Exclude: + - "test/**/*" + Metrics/CyclomaticComplexity: Enabled: false @@ -149,7 +154,7 @@ Naming/ClassAndModuleCamelCase: Naming/MethodParameterName: Enabled: false -Naming/PredicateName: +Naming/PredicatePrefix: Exclude: - "**/*.rbi" diff --git a/.stats.yml b/.stats.yml index 636c6f9..6804ffb 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 15 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/scrapegraphai%2Fscrapegraphai-969ebada41127057e4cda129b2e7206224743b5c7fd33aa8ae062ff71b775ac9.yml -openapi_spec_hash: 2b2c2c684e6f6885398efca5f2b1f854 -config_hash: 30d69c79e34a1ea6a0405573ce30d927 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/scrapegraphai%2Fscrapegraphai-633fdeab6abaefbe666099e8f86ce6b2acc9dacff1c33a80813bb04e8e437229.yml +openapi_spec_hash: f41ec90694ca8e7233bd20cc7ff1afbf +config_hash: 6889576ba0fdc14f2c71cea09a60a0f6 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3bb17dc --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,35 @@ +# Changelog + +## 0.1.0 (2025-10-09) + +Full Changelog: [v0.0.1...v0.1.0](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/compare/v0.0.1...v0.1.0) + +### Features + +* add files ([38693c7](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/38693c7bfbbb685cb67bd5608077160cfec2449b)) +* **api:** update via SDK Studio ([f4212bc](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/f4212bc58fa4fc50bcb4e82c309ebe66856403bf)) +* expose response headers for both streams and errors ([933d18a](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/933d18a743ff2fd147d52e515af8be5ef0ed1027)) + + +### Bug Fixes + +* always send `filename=...` for multipart requests where a file is expected ([4b79b41](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/4b79b41fd7efdef8faa16c55f07db428f745e7fe)) +* bump sorbet version and fix new type errors from the breaking change ([5eac2d1](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/5eac2d183e1e82671df34e23af49d79794937575)) +* coroutine leaks from connection pool ([b957278](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/b957278f0a28535d6a95932c4f7d720416b6af5a)) + + +### Performance Improvements + +* faster code formatting ([0690c67](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/0690c675b38d9ce81bb52bf445d03de71a4a8f11)) + + +### Chores + +* add json schema comment for rubocop.yml ([69c97ad](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/69c97add2b0d8b3719eb93ee1efd2941bb2a3013)) +* allow fast-format to use bsd sed as well ([507400e](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/507400ea5992ac44c967814327ff0b3de8f8e2bc)) +* configure new SDK language ([fb2515a](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/fb2515a6c3f6b1a96c9660246a9fac6ff8887826)) +* do not install brew dependencies in ./scripts/bootstrap by default ([196c996](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/196c996000e45d9c3f3fbc8358b6df66ad8c8fce)) +* ignore linter error for tests having large collections ([d17ddcd](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/d17ddcd7235b2e947815ba027ad847ebacd51e52)) +* **internal:** codegen related update ([8295869](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/8295869cf8aaabcaf9ce705ca847716eadd16205)) +* **internal:** codegen related update ([5056a88](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/5056a880cc116c4dc0e7350471eadfe181e72408)) +* update SDK settings ([feb82d3](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/commit/feb82d35c7ad0a00c2425c19bf642c25572607e2)) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 903e5fe..ec5addf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -43,13 +43,13 @@ If you’d like to use the repository from source, you can either install from g To install via git in your `Gemfile`: ```ruby -gem "scrapegraphai", git: "https://www.github.com/stainless-sdks/scrapegraphai-ruby" +gem "scrapegraphai", git: "https://www.github.com/ScrapeGraphAI/scrapegraphai-ruby" ``` Alternatively, reference local copy of the repo: ```bash -$ git clone -- 'https://www.github.com/stainless-sdks/scrapegraphai-ruby' '' +$ git clone -- 'https://www.github.com/ScrapeGraphAI/scrapegraphai-ruby' '' ``` ```ruby diff --git a/Gemfile.lock b/Gemfile.lock index b4af6d9..1f0dbc7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,13 +11,13 @@ GIT PATH remote: . specs: - scrapegraphai (0.0.1) + scrapegraphai (0.1.0) connection_pool GEM remote: https://rubygems.org/ specs: - activesupport (8.0.2) + activesupport (8.0.2.1) base64 benchmark (>= 0.3) bigdecimal @@ -33,45 +33,50 @@ GEM addressable (2.8.7) public_suffix (>= 2.0.2, < 7.0) ast (2.4.3) - async (2.24.0) + async (2.27.3) console (~> 1.29) fiber-annotation - io-event (~> 1.9) + io-event (~> 1.11) metrics (~> 0.12) traces (~> 0.15) - base64 (0.2.0) - benchmark (0.4.0) - bigdecimal (3.1.9) + base64 (0.3.0) + benchmark (0.4.1) + bigdecimal (3.2.2) concurrent-ruby (1.3.5) connection_pool (2.5.3) - console (1.30.2) + console (1.33.0) fiber-annotation fiber-local (~> 1.1) json crack (1.0.0) bigdecimal rexml - csv (3.3.4) - drb (2.2.1) + csv (3.3.5) + drb (2.2.3) erubi (1.13.1) + ffi (1.17.2-aarch64-linux-gnu) + ffi (1.17.2-aarch64-linux-musl) + ffi (1.17.2-arm64-darwin) + ffi (1.17.2-x86_64-darwin) ffi (1.17.2-x86_64-linux-gnu) + ffi (1.17.2-x86_64-linux-musl) fiber-annotation (0.2.0) fiber-local (1.1.0) fiber-storage fiber-storage (1.0.1) fileutils (1.7.3) - hashdiff (1.1.2) + hashdiff (1.2.0) i18n (1.14.7) concurrent-ruby (~> 1.0) - io-event (1.10.0) - json (2.11.3) - language_server-protocol (3.17.0.4) + io-event (1.11.2) + json (2.13.2) + language_server-protocol (3.17.0.5) lint_roller (1.1.0) listen (3.9.0) rb-fsevent (~> 0.10, >= 0.10.3) rb-inotify (~> 0.9, >= 0.9.10) logger (1.7.0) - metrics (0.12.2) + metrics (0.13.0) minitest (5.25.5) minitest-focus (1.4.0) minitest (>= 4, < 6) @@ -84,7 +89,7 @@ GEM mutex_m (0.3.0) netrc (0.11.0) parallel (1.27.0) - parser (3.3.8.0) + parser (3.3.9.0) ast (~> 2.4.1) racc prettier_print (1.2.1) @@ -92,20 +97,19 @@ GEM public_suffix (6.0.2) racc (1.8.1) rainbow (3.1.1) - rake (13.2.1) + rake (13.3.0) rb-fsevent (0.11.2) rb-inotify (0.11.1) ffi (~> 1.0) - rbi (0.3.2) + rbi (0.3.6) prism (~> 1.0) rbs (>= 3.4.4) - sorbet-runtime (>= 0.5.9204) - rbs (3.9.2) + rbs (3.9.4) logger redcarpet (3.6.1) - regexp_parser (2.10.0) + regexp_parser (2.11.2) rexml (3.4.1) - rubocop (1.75.5) + rubocop (1.79.2) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) @@ -113,25 +117,28 @@ GEM parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 2.9.3, < 3.0) - rubocop-ast (>= 1.44.0, < 2.0) + rubocop-ast (>= 1.46.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.44.1) + rubocop-ast (1.46.0) parser (>= 3.3.7.2) prism (~> 1.4) ruby-progressbar (1.13.0) securerandom (0.4.1) - sorbet (0.5.12067) - sorbet-static (= 0.5.12067) - sorbet-runtime (0.5.12067) - sorbet-static (0.5.12067-x86_64-linux) - sorbet-static-and-runtime (0.5.12067) - sorbet (= 0.5.12067) - sorbet-runtime (= 0.5.12067) - spoom (1.6.1) + sorbet (0.5.12424) + sorbet-static (= 0.5.12424) + sorbet-runtime (0.5.12424) + sorbet-static (0.5.12424-aarch64-linux) + sorbet-static (0.5.12424-universal-darwin) + sorbet-static (0.5.12424-x86_64-linux) + sorbet-static-and-runtime (0.5.12424) + sorbet (= 0.5.12424) + sorbet-runtime (= 0.5.12424) + spoom (1.6.3) erubi (>= 1.10.0) prism (>= 0.28.0) - rbi (>= 0.2.3) + rbi (>= 0.3.3) + rexml (>= 3.2.6) sorbet-static-and-runtime (>= 0.5.10187) thor (>= 0.19.2) steep (1.10.0) @@ -152,7 +159,7 @@ GEM terminal-table (>= 2, < 5) uri (>= 0.12.0) strscan (3.1.5) - syntax_tree (6.2.0) + syntax_tree (6.3.0) prettier_print (>= 1.2.0) tapioca (0.16.11) benchmark @@ -166,11 +173,11 @@ GEM yard-sorbet terminal-table (4.0.0) unicode-display_width (>= 1.1.1, < 4) - thor (1.3.2) - traces (0.15.2) + thor (1.4.0) + traces (0.17.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) - unicode-display_width (3.1.4) + unicode-display_width (3.1.5) unicode-emoji (~> 4.0, >= 4.0.4) unicode-emoji (4.0.4) uri (1.0.3) @@ -185,7 +192,14 @@ GEM yard PLATFORMS - x86_64-linux + aarch64-linux + aarch64-linux-gnu + aarch64-linux-musl + arm64-darwin + universal-darwin + x86_64-darwin + x86_64-linux-gnu + x86_64-linux-musl DEPENDENCIES async diff --git a/README.md b/README.md index 77916c1..2d1c1da 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Scrapegraphai Ruby API library -The Scrapegraphai Ruby library provides convenient access to the Scrapegraphai REST API from any Ruby 3.2.0+ application. It ships with comprehensive types & docstrings in Yard, RBS, and RBI – [see below](https://github.com/stainless-sdks/scrapegraphai-ruby#Sorbet) for usage with Sorbet. The standard library's `net/http` is used as the HTTP transport, with connection pooling via the `connection_pool` gem. +The Scrapegraphai Ruby library provides convenient access to the Scrapegraphai REST API from any Ruby 3.2.0+ application. It ships with comprehensive types & docstrings in Yard, RBS, and RBI – [see below](https://github.com/ScrapeGraphAI/scrapegraphai-ruby#Sorbet) for usage with Sorbet. The standard library's `net/http` is used as the HTTP transport, with connection pooling via the `connection_pool` gem. It is generated with [Stainless](https://www.stainless.com/). @@ -14,10 +14,14 @@ The REST API documentation can be found on [scrapegraphai.com](https://scrapegra To use this gem, install via Bundler by adding the following to your application's `Gemfile`: + + ```ruby -gem "scrapegraphai", "~> 0.0.1" +gem "scrapegraphai", "~> 0.1.0" ``` + + ## Usage ```ruby @@ -235,4 +239,4 @@ Ruby 3.2.0 or higher. ## Contributing -See [the contributing documentation](https://github.com/stainless-sdks/scrapegraphai-ruby/tree/main/CONTRIBUTING.md). +See [the contributing documentation](https://github.com/ScrapeGraphAI/scrapegraphai-ruby/tree/main/CONTRIBUTING.md). diff --git a/Rakefile b/Rakefile index 92d310b..ebbcdc5 100644 --- a/Rakefile +++ b/Rakefile @@ -12,6 +12,8 @@ tapioca = "sorbet/tapioca" examples = "examples" ignore_file = ".ignore" +FILES_ENV = "FORMAT_FILE" + CLEAN.push(*%w[.idea/ .ruby-lsp/ .yardoc/ doc/], *FileList["*.gem"], ignore_file) CLOBBER.push(*%w[sorbet/rbi/annotations/ sorbet/rbi/gems/], tapioca) @@ -38,6 +40,14 @@ end xargs = %w[xargs --no-run-if-empty --null --max-procs=0 --max-args=300 --] ruby_opt = {"RUBYOPT" => [ENV["RUBYOPT"], "--encoding=UTF-8"].compact.join(" ")} +filtered = ->(ext, dirs) do + if ENV.key?(FILES_ENV) + %w[sed -E -n -e] << "/\\.#{ext}$/p" << "--" << ENV.fetch(FILES_ENV) + else + (%w[find] + dirs + %w[-type f -and -name]) << "*.#{ext}" << "-print0" + end +end + desc("Lint `*.rb(i)`") multitask(:"lint:rubocop") do find = %w[find ./lib ./test ./rbi ./examples -type f -and ( -name *.rb -or -name *.rbi ) -print0] @@ -52,24 +62,26 @@ multitask(:"lint:rubocop") do sh("#{find.shelljoin} | #{lint.shelljoin}") end +norm_lines = %w[tr -- \n \0].shelljoin + desc("Format `*.rb`") multitask(:"format:rb") do # while `syntax_tree` is much faster than `rubocop`, `rubocop` is the only formatter with full syntax support - find = %w[find ./lib ./test ./examples -type f -and -name *.rb -print0] + files = filtered["rb", %w[./lib ./test ./examples]] fmt = xargs + %w[rubocop --fail-level F --autocorrect --format simple --] - sh("#{find.shelljoin} | #{fmt.shelljoin}") + sh("#{files.shelljoin} | #{norm_lines} | #{fmt.shelljoin}") end desc("Format `*.rbi`") multitask(:"format:rbi") do - find = %w[find ./rbi -type f -and -name *.rbi -print0] + files = filtered["rbi", %w[./rbi]] fmt = xargs + %w[stree write --] - sh(ruby_opt, "#{find.shelljoin} | #{fmt.shelljoin}") + sh(ruby_opt, "#{files.shelljoin} | #{norm_lines} | #{fmt.shelljoin}") end desc("Format `*.rbs`") multitask(:"format:rbs") do - find = %w[find ./sig -type f -name *.rbs -print0] + files = filtered["rbs", %w[./sig]] inplace = /darwin|bsd/ =~ RUBY_PLATFORM ? ["-i", ""] : %w[-i] uuid = SecureRandom.uuid @@ -98,13 +110,13 @@ multitask(:"format:rbs") do success = false # transform class aliases to type aliases, which syntax tree has no trouble with - sh("#{find.shelljoin} | #{pre.shelljoin}") + sh("#{files.shelljoin} | #{norm_lines} | #{pre.shelljoin}") # run syntax tree to format `*.rbs` files - sh(ruby_opt, "#{find.shelljoin} | #{fmt.shelljoin}") do + sh(ruby_opt, "#{files.shelljoin} | #{norm_lines} | #{fmt.shelljoin}") do success = _1 end # transform type aliases back to class aliases - sh("#{find.shelljoin} | #{pst.shelljoin}") + sh("#{files.shelljoin} | #{norm_lines} | #{pst.shelljoin}") # always run post-processing to remove comment marker fail unless success diff --git a/bin/check-release-environment b/bin/check-release-environment new file mode 100644 index 0000000..c05436e --- /dev/null +++ b/bin/check-release-environment @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +errors=() + +if [ -z "${GEM_HOST_API_KEY}" ]; then + errors+=("The GEM_HOST_API_KEY secret has not been set. Please set it in either this repository's secrets or your organization secrets") +fi + +lenErrors=${#errors[@]} + +if [[ lenErrors -gt 0 ]]; then + echo -e "Found the following errors in the release environment:\n" + + for error in "${errors[@]}"; do + echo -e "- $error\n" + done + + exit 1 +fi + +echo "The environment is ready to push releases!" diff --git a/lib/scrapegraphai/errors.rb b/lib/scrapegraphai/errors.rb index cd320b9..05d0547 100644 --- a/lib/scrapegraphai/errors.rb +++ b/lib/scrapegraphai/errors.rb @@ -40,6 +40,9 @@ class APIError < Scrapegraphai::Errors::Error # @return [Integer, nil] attr_accessor :status + # @return [Hash{String=>String}, nil] + attr_accessor :headers + # @return [Object, nil] attr_accessor :body @@ -47,13 +50,15 @@ class APIError < Scrapegraphai::Errors::Error # # @param url [URI::Generic] # @param status [Integer, nil] + # @param headers [Hash{String=>String}, nil] # @param body [Object, nil] # @param request [nil] # @param response [nil] # @param message [String, nil] - def initialize(url:, status: nil, body: nil, request: nil, response: nil, message: nil) + def initialize(url:, status: nil, headers: nil, body: nil, request: nil, response: nil, message: nil) @url = url @status = status + @headers = headers @body = body @request = request @response = response @@ -74,6 +79,7 @@ class APIConnectionError < Scrapegraphai::Errors::APIError # # @param url [URI::Generic] # @param status [nil] + # @param headers [Hash{String=>String}, nil] # @param body [nil] # @param request [nil] # @param response [nil] @@ -81,6 +87,7 @@ class APIConnectionError < Scrapegraphai::Errors::APIError def initialize( url:, status: nil, + headers: nil, body: nil, request: nil, response: nil, @@ -95,6 +102,7 @@ class APITimeoutError < Scrapegraphai::Errors::APIConnectionError # # @param url [URI::Generic] # @param status [nil] + # @param headers [Hash{String=>String}, nil] # @param body [nil] # @param request [nil] # @param response [nil] @@ -102,6 +110,7 @@ class APITimeoutError < Scrapegraphai::Errors::APIConnectionError def initialize( url:, status: nil, + headers: nil, body: nil, request: nil, response: nil, @@ -116,21 +125,24 @@ class APIStatusError < Scrapegraphai::Errors::APIError # # @param url [URI::Generic] # @param status [Integer] + # @param headers [Hash{String=>String}, nil] # @param body [Object, nil] # @param request [nil] # @param response [nil] # @param message [String, nil] # # @return [self] - def self.for(url:, status:, body:, request:, response:, message: nil) - kwargs = { - url: url, - status: status, - body: body, - request: request, - response: response, - message: message - } + def self.for(url:, status:, headers:, body:, request:, response:, message: nil) + kwargs = + { + url: url, + status: status, + headers: headers, + body: body, + request: request, + response: response, + message: message + } case status in 400 @@ -162,15 +174,17 @@ def self.for(url:, status:, body:, request:, response:, message: nil) # # @param url [URI::Generic] # @param status [Integer] + # @param headers [Hash{String=>String}, nil] # @param body [Object, nil] # @param request [nil] # @param response [nil] # @param message [String, nil] - def initialize(url:, status:, body:, request:, response:, message: nil) + def initialize(url:, status:, headers:, body:, request:, response:, message: nil) message ||= {url: url.to_s, status: status, body: body} super( url: url, status: status, + headers: headers, body: body, request: request, response: response, diff --git a/lib/scrapegraphai/file_part.rb b/lib/scrapegraphai/file_part.rb index 7028962..1f8949f 100644 --- a/lib/scrapegraphai/file_part.rb +++ b/lib/scrapegraphai/file_part.rb @@ -38,18 +38,21 @@ def to_json(*a) = read.to_json(*a) def to_yaml(*a) = read.to_yaml(*a) # @param content [Pathname, StringIO, IO, String] - # @param filename [String, nil] + # @param filename [Pathname, String, nil] # @param content_type [String, nil] def initialize(content, filename: nil, content_type: nil) - @content = content + @content_type = content_type @filename = - case content - in Pathname - filename.nil? ? content.basename.to_path : ::File.basename(filename) + case [filename, (@content = content)] + in [String | Pathname, _] + ::File.basename(filename) + in [nil, Pathname] + content.basename.to_path + in [nil, IO] + content.to_path else - filename.nil? ? nil : ::File.basename(filename) + filename end - @content_type = content_type end end end diff --git a/lib/scrapegraphai/internal/transport/base_client.rb b/lib/scrapegraphai/internal/transport/base_client.rb index 626eb9a..e889550 100644 --- a/lib/scrapegraphai/internal/transport/base_client.rb +++ b/lib/scrapegraphai/internal/transport/base_client.rb @@ -47,7 +47,7 @@ def validate!(req) # @api private # # @param status [Integer] - # @param headers [Hash{String=>String}, Net::HTTPHeader] + # @param headers [Hash{String=>String}] # # @return [Boolean] def should_retry?(status, headers:) @@ -85,7 +85,7 @@ def should_retry?(status, headers:) # # @param status [Integer] # - # @param response_headers [Hash{String=>String}, Net::HTTPHeader] + # @param response_headers [Hash{String=>String}] # # @return [Hash{Symbol=>Object}] def follow_redirect(request, status:, response_headers:) @@ -367,10 +367,7 @@ def initialize( # @return [Array(Integer, Net::HTTPResponse, Enumerable)] def send_request(request, redirect_count:, retry_count:, send_retry_header:) url, headers, max_retries, timeout = request.fetch_values(:url, :headers, :max_retries, :timeout) - input = { - **request.except(:timeout), - deadline: Scrapegraphai::Internal::Util.monotonic_secs + timeout - } + input = {**request.except(:timeout), deadline: Scrapegraphai::Internal::Util.monotonic_secs + timeout} if send_retry_header headers["x-stainless-retry-count"] = retry_count.to_s @@ -381,6 +378,7 @@ def send_request(request, redirect_count:, retry_count:, send_retry_header:) rescue Scrapegraphai::Errors::APIConnectionError => e status = e end + headers = Scrapegraphai::Internal::Util.normalized_headers(response&.each_header&.to_h) case status in ..299 @@ -397,7 +395,7 @@ def send_request(request, redirect_count:, retry_count:, send_retry_header:) in 300..399 self.class.reap_connection!(status, stream: stream) - request = self.class.follow_redirect(request, status: status, response_headers: response) + request = self.class.follow_redirect(request, status: status, response_headers: headers) send_request( request, redirect_count: redirect_count + 1, @@ -406,9 +404,9 @@ def send_request(request, redirect_count:, retry_count:, send_retry_header:) ) in Scrapegraphai::Errors::APIConnectionError if retry_count >= max_retries raise status - in (400..) if retry_count >= max_retries || !self.class.should_retry?(status, headers: response) + in (400..) if retry_count >= max_retries || !self.class.should_retry?(status, headers: headers) decoded = Kernel.then do - Scrapegraphai::Internal::Util.decode_content(response, stream: stream, suppress_error: true) + Scrapegraphai::Internal::Util.decode_content(headers, stream: stream, suppress_error: true) ensure self.class.reap_connection!(status, stream: stream) end @@ -416,6 +414,7 @@ def send_request(request, redirect_count:, retry_count:, send_retry_header:) raise Scrapegraphai::Errors::APIStatusError.for( url: url, status: status, + headers: headers, body: decoded, request: nil, response: response @@ -492,19 +491,21 @@ def request(req) send_retry_header: send_retry_header ) - decoded = Scrapegraphai::Internal::Util.decode_content(response, stream: stream) + headers = Scrapegraphai::Internal::Util.normalized_headers(response.each_header.to_h) + decoded = Scrapegraphai::Internal::Util.decode_content(headers, stream: stream) case req in {stream: Class => st} st.new( model: model, url: url, status: status, + headers: headers, response: response, unwrap: unwrap, stream: decoded ) in {page: Class => page} - page.new(client: self, req: req, headers: response, page_data: decoded) + page.new(client: self, req: req, headers: headers, page_data: decoded) else unwrapped = Scrapegraphai::Internal::Util.dig(decoded, unwrap) Scrapegraphai::Internal::Type::Converter.coerce(model, unwrapped) diff --git a/lib/scrapegraphai/internal/transport/pooled_net_requester.rb b/lib/scrapegraphai/internal/transport/pooled_net_requester.rb index 02576a8..2968d6a 100644 --- a/lib/scrapegraphai/internal/transport/pooled_net_requester.rb +++ b/lib/scrapegraphai/internal/transport/pooled_net_requester.rb @@ -134,9 +134,9 @@ def execute(request) # rubocop:disable Metrics/BlockLength enum = Enumerator.new do |y| - with_pool(url, deadline: deadline) do |conn| - next if finished + next if finished + with_pool(url, deadline: deadline) do |conn| req, closing = self.class.build_request(request) do self.class.calibrate_socket_timeout(conn, deadline) end @@ -149,7 +149,7 @@ def execute(request) self.class.calibrate_socket_timeout(conn, deadline) conn.request(req) do |rsp| - y << [conn, req, rsp] + y << [req, rsp] break if finished rsp.read_body do |bytes| @@ -160,6 +160,8 @@ def execute(request) end eof = true end + ensure + conn.finish if !eof && conn&.started? end rescue Timeout::Error raise Scrapegraphai::Errors::APITimeoutError.new(url: url, request: req) @@ -168,16 +170,11 @@ def execute(request) end # rubocop:enable Metrics/BlockLength - conn, _, response = enum.next + _, response = enum.next body = Scrapegraphai::Internal::Util.fused_enum(enum, external: true) do finished = true - tap do - enum.next - rescue StopIteration - nil - end + loop { enum.next } ensure - conn.finish if !eof && conn&.started? closing&.call end [Integer(response.code), response, body] @@ -193,15 +190,7 @@ def initialize(size: self.class::DEFAULT_MAX_CONNECTIONS) end define_sorbet_constant!(:Request) do - T.type_alias do - { - method: Symbol, - url: URI::Generic, - headers: T::Hash[String, String], - body: T.anything, - deadline: Float - } - end + T.type_alias { {method: Symbol, url: URI::Generic, headers: T::Hash[String, String], body: T.anything, deadline: Float} } end end end diff --git a/lib/scrapegraphai/internal/type/base_model.rb b/lib/scrapegraphai/internal/type/base_model.rb index 3206b81..7d0c770 100644 --- a/lib/scrapegraphai/internal/type/base_model.rb +++ b/lib/scrapegraphai/internal/type/base_model.rb @@ -64,14 +64,7 @@ def fields setter = :"#{name_sym}=" api_name = info.fetch(:api_name, name_sym) nilable = info.fetch(:nil?, false) - const = if required && !nilable - info.fetch( - :const, - Scrapegraphai::Internal::OMIT - ) - else - Scrapegraphai::Internal::OMIT - end + const = required && !nilable ? info.fetch(:const, Scrapegraphai::Internal::OMIT) : Scrapegraphai::Internal::OMIT [name_sym, setter].each { undef_method(_1) } if known_fields.key?(name_sym) diff --git a/lib/scrapegraphai/internal/type/base_page.rb b/lib/scrapegraphai/internal/type/base_page.rb index 9068275..dcfdaa4 100644 --- a/lib/scrapegraphai/internal/type/base_page.rb +++ b/lib/scrapegraphai/internal/type/base_page.rb @@ -39,7 +39,7 @@ def to_enum = super(:auto_paging_each) # # @param client [Scrapegraphai::Internal::Transport::BaseClient] # @param req [Hash{Symbol=>Object}] - # @param headers [Hash{String=>String}, Net::HTTPHeader] + # @param headers [Hash{String=>String}] # @param page_data [Object] def initialize(client:, req:, headers:, page_data:) @client = client diff --git a/lib/scrapegraphai/internal/type/file_input.rb b/lib/scrapegraphai/internal/type/file_input.rb index 64dc9f8..312923b 100644 --- a/lib/scrapegraphai/internal/type/file_input.rb +++ b/lib/scrapegraphai/internal/type/file_input.rb @@ -82,17 +82,20 @@ def coerce(value, state:) # # @return [Pathname, StringIO, IO, String, Object] def dump(value, state:) - # rubocop:disable Lint/DuplicateBranch case value + in StringIO | String + # https://datatracker.ietf.org/doc/html/rfc7578#section-4.2 + # while not required, a filename is recommended, and in practice many servers do expect this + Scrapegraphai::FilePart.new(value, filename: "upload") in IO state[:can_retry] = false + value.to_path.nil? ? Scrapegraphai::FilePart.new(value, filename: "upload") : value in Scrapegraphai::FilePart if value.content.is_a?(IO) state[:can_retry] = false + value else + value end - # rubocop:enable Lint/DuplicateBranch - - value end # @api private diff --git a/lib/scrapegraphai/internal/util.rb b/lib/scrapegraphai/internal/util.rb index 6e32e7b..91e588e 100644 --- a/lib/scrapegraphai/internal/util.rb +++ b/lib/scrapegraphai/internal/util.rb @@ -244,7 +244,7 @@ class << self # # @return [String] def uri_origin(uri) - "#{uri.scheme}://#{uri.host}#{uri.port == uri.default_port ? '' : ":#{uri.port}"}" + "#{uri.scheme}://#{uri.host}#{":#{uri.port}" unless uri.port == uri.default_port}" end # @api private @@ -566,7 +566,8 @@ class << self # # @return [Array(String, Enumerable)] private def encode_multipart_streaming(body) - boundary = SecureRandom.urlsafe_base64(60) + # RFC 1521 Section 7.2.1 says we should have 70 char maximum for boundary length + boundary = SecureRandom.urlsafe_base64(46) closing = [] strio = writable_enum do |y| @@ -647,7 +648,7 @@ def force_charset!(content_type, text:) # # Assumes each chunk in stream has `Encoding::BINARY`. # - # @param headers [Hash{String=>String}, Net::HTTPHeader] + # @param headers [Hash{String=>String}] # @param stream [Enumerable] # @param suppress_error [Boolean] # diff --git a/lib/scrapegraphai/resources/generate_schema.rb b/lib/scrapegraphai/resources/generate_schema.rb index 252ced9..ba5eadc 100644 --- a/lib/scrapegraphai/resources/generate_schema.rb +++ b/lib/scrapegraphai/resources/generate_schema.rb @@ -32,7 +32,8 @@ def create(params) # # @overload retrieve(request_id, request_options: {}) # - # @param request_id [String] + # @param request_id [String] Unique request identifier + # # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] # # @return [Scrapegraphai::Models::GenerateSchemaRetrieveResponse::CompletedSchemaGenerationResponse, Scrapegraphai::Models::GenerateSchemaRetrieveResponse::FailedSchemaGenerationResponse] diff --git a/lib/scrapegraphai/resources/searchscraper.rb b/lib/scrapegraphai/resources/searchscraper.rb index fc5c733..978ba59 100644 --- a/lib/scrapegraphai/resources/searchscraper.rb +++ b/lib/scrapegraphai/resources/searchscraper.rb @@ -37,7 +37,8 @@ def create(params) # # @overload retrieve_status(request_id, request_options: {}) # - # @param request_id [String] + # @param request_id [String] Unique request identifier + # # @param request_options [Scrapegraphai::RequestOptions, Hash{Symbol=>Object}, nil] # # @return [Scrapegraphai::Models::CompletedSearchScraper, Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::FailedSearchScraperResponse] diff --git a/lib/scrapegraphai/version.rb b/lib/scrapegraphai/version.rb index 2ef4d62..63bd8af 100644 --- a/lib/scrapegraphai/version.rb +++ b/lib/scrapegraphai/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Scrapegraphai - VERSION = "0.0.1" + VERSION = "0.1.0" end diff --git a/rbi/scrapegraphai/errors.rbi b/rbi/scrapegraphai/errors.rbi index 6cece32..55df99d 100644 --- a/rbi/scrapegraphai/errors.rbi +++ b/rbi/scrapegraphai/errors.rbi @@ -33,6 +33,9 @@ module Scrapegraphai sig { returns(T.nilable(Integer)) } attr_accessor :status + sig { returns(T.nilable(T::Hash[String, String])) } + attr_accessor :headers + sig { returns(T.nilable(T.anything)) } attr_accessor :body @@ -41,6 +44,7 @@ module Scrapegraphai params( url: URI::Generic, status: T.nilable(Integer), + headers: T.nilable(T::Hash[String, String]), body: T.nilable(Object), request: NilClass, response: NilClass, @@ -50,6 +54,7 @@ module Scrapegraphai def self.new( url:, status: nil, + headers: nil, body: nil, request: nil, response: nil, @@ -59,10 +64,10 @@ module Scrapegraphai end class APIConnectionError < Scrapegraphai::Errors::APIError - sig { void } + sig { returns(NilClass) } attr_accessor :status - sig { void } + sig { returns(NilClass) } attr_accessor :body # @api private @@ -70,6 +75,7 @@ module Scrapegraphai params( url: URI::Generic, status: NilClass, + headers: T.nilable(T::Hash[String, String]), body: NilClass, request: NilClass, response: NilClass, @@ -79,6 +85,7 @@ module Scrapegraphai def self.new( url:, status: nil, + headers: nil, body: nil, request: nil, response: nil, @@ -93,6 +100,7 @@ module Scrapegraphai params( url: URI::Generic, status: NilClass, + headers: T.nilable(T::Hash[String, String]), body: NilClass, request: NilClass, response: NilClass, @@ -102,6 +110,7 @@ module Scrapegraphai def self.new( url:, status: nil, + headers: nil, body: nil, request: nil, response: nil, @@ -116,13 +125,22 @@ module Scrapegraphai params( url: URI::Generic, status: Integer, + headers: T.nilable(T::Hash[String, String]), body: T.nilable(Object), request: NilClass, response: NilClass, message: T.nilable(String) ).returns(T.attached_class) end - def self.for(url:, status:, body:, request:, response:, message: nil) + def self.for( + url:, + status:, + headers:, + body:, + request:, + response:, + message: nil + ) end sig { returns(Integer) } @@ -133,13 +151,22 @@ module Scrapegraphai params( url: URI::Generic, status: Integer, + headers: T.nilable(T::Hash[String, String]), body: T.nilable(Object), request: NilClass, response: NilClass, message: T.nilable(String) ).returns(T.attached_class) end - def self.new(url:, status:, body:, request:, response:, message: nil) + def self.new( + url:, + status:, + headers:, + body:, + request:, + response:, + message: nil + ) end end diff --git a/rbi/scrapegraphai/file_part.rbi b/rbi/scrapegraphai/file_part.rbi index 9c2014c..e8b3a4f 100644 --- a/rbi/scrapegraphai/file_part.rbi +++ b/rbi/scrapegraphai/file_part.rbi @@ -27,7 +27,7 @@ module Scrapegraphai sig do params( content: T.any(Pathname, StringIO, IO, String), - filename: T.nilable(String), + filename: T.nilable(T.any(Pathname, String)), content_type: T.nilable(String) ).returns(T.attached_class) end diff --git a/rbi/scrapegraphai/internal/transport/base_client.rbi b/rbi/scrapegraphai/internal/transport/base_client.rbi index 4202240..f3505fa 100644 --- a/rbi/scrapegraphai/internal/transport/base_client.rbi +++ b/rbi/scrapegraphai/internal/transport/base_client.rbi @@ -85,10 +85,9 @@ module Scrapegraphai # @api private sig do - params( - status: Integer, - headers: T.any(T::Hash[String, String], Net::HTTPHeader) - ).returns(T::Boolean) + params(status: Integer, headers: T::Hash[String, String]).returns( + T::Boolean + ) end def should_retry?(status, headers:) end @@ -99,7 +98,7 @@ module Scrapegraphai request: Scrapegraphai::Internal::Transport::BaseClient::RequestInput, status: Integer, - response_headers: T.any(T::Hash[String, String], Net::HTTPHeader) + response_headers: T::Hash[String, String] ).returns( Scrapegraphai::Internal::Transport::BaseClient::RequestInput ) diff --git a/rbi/scrapegraphai/internal/type/base_page.rbi b/rbi/scrapegraphai/internal/type/base_page.rbi index 8e1ce1d..d31316d 100644 --- a/rbi/scrapegraphai/internal/type/base_page.rbi +++ b/rbi/scrapegraphai/internal/type/base_page.rbi @@ -31,7 +31,7 @@ module Scrapegraphai client: Scrapegraphai::Internal::Transport::BaseClient, req: Scrapegraphai::Internal::Transport::BaseClient::RequestComponents, - headers: T.any(T::Hash[String, String], Net::HTTPHeader), + headers: T::Hash[String, String], page_data: T.anything ).void end diff --git a/rbi/scrapegraphai/internal/util.rbi b/rbi/scrapegraphai/internal/util.rbi index b47d2b5..a023c10 100644 --- a/rbi/scrapegraphai/internal/util.rbi +++ b/rbi/scrapegraphai/internal/util.rbi @@ -361,7 +361,7 @@ module Scrapegraphai # Assumes each chunk in stream has `Encoding::BINARY`. sig do params( - headers: T.any(T::Hash[String, String], Net::HTTPHeader), + headers: T::Hash[String, String], stream: T::Enumerable[String], suppress_error: T::Boolean ).returns(T.anything) diff --git a/rbi/scrapegraphai/resources/generate_schema.rbi b/rbi/scrapegraphai/resources/generate_schema.rbi index 21d8482..96833ea 100644 --- a/rbi/scrapegraphai/resources/generate_schema.rbi +++ b/rbi/scrapegraphai/resources/generate_schema.rbi @@ -30,7 +30,11 @@ module Scrapegraphai Scrapegraphai::Models::GenerateSchemaRetrieveResponse::Variants ) end - def retrieve(request_id, request_options: {}) + def retrieve( + # Unique request identifier + request_id, + request_options: {} + ) end # @api private diff --git a/rbi/scrapegraphai/resources/searchscraper.rbi b/rbi/scrapegraphai/resources/searchscraper.rbi index 285fa85..05c53ff 100644 --- a/rbi/scrapegraphai/resources/searchscraper.rbi +++ b/rbi/scrapegraphai/resources/searchscraper.rbi @@ -36,7 +36,11 @@ module Scrapegraphai Scrapegraphai::Models::SearchscraperRetrieveStatusResponse::Variants ) end - def retrieve_status(request_id, request_options: {}) + def retrieve_status( + # Unique request identifier + request_id, + request_options: {} + ) end # @api private diff --git a/release-please-config.json b/release-please-config.json new file mode 100644 index 0000000..3f5720a --- /dev/null +++ b/release-please-config.json @@ -0,0 +1,70 @@ +{ + "packages": { + ".": {} + }, + "$schema": "https://raw.githubusercontent.com/stainless-api/release-please/main/schemas/config.json", + "include-v-in-tag": true, + "include-component-in-tag": false, + "versioning": "prerelease", + "prerelease": true, + "bump-minor-pre-major": true, + "bump-patch-for-minor-pre-major": false, + "pull-request-header": "Automated Release PR", + "pull-request-title-pattern": "release: ${version}", + "changelog-sections": [ + { + "type": "feat", + "section": "Features" + }, + { + "type": "fix", + "section": "Bug Fixes" + }, + { + "type": "perf", + "section": "Performance Improvements" + }, + { + "type": "revert", + "section": "Reverts" + }, + { + "type": "chore", + "section": "Chores" + }, + { + "type": "docs", + "section": "Documentation" + }, + { + "type": "style", + "section": "Styles" + }, + { + "type": "refactor", + "section": "Refactors" + }, + { + "type": "test", + "section": "Tests", + "hidden": true + }, + { + "type": "build", + "section": "Build System" + }, + { + "type": "ci", + "section": "Continuous Integration", + "hidden": true + } + ], + "release-type": "ruby", + "version-file": "lib/scrapegraphai/version.rb", + "extra-files": [ + { + "type": "ruby-readme", + "path": "README.md" + } + ] +} \ No newline at end of file diff --git a/scrapegraphai.gemspec b/scrapegraphai.gemspec index 9743167..e15938e 100644 --- a/scrapegraphai.gemspec +++ b/scrapegraphai.gemspec @@ -10,7 +10,7 @@ Gem::Specification.new do |s| s.email = "" s.homepage = "https://gemdocs.org/gems/scrapegraphai" s.metadata["homepage_uri"] = s.homepage - s.metadata["source_code_uri"] = "https://github.com/stainless-sdks/scrapegraphai-ruby" + s.metadata["source_code_uri"] = "https://github.com/ScrapeGraphAI/scrapegraphai-ruby" s.metadata["rubygems_mfa_required"] = false.to_s s.required_ruby_version = ">= 3.2.0" diff --git a/scripts/bootstrap b/scripts/bootstrap index cc31aa8..3487864 100755 --- a/scripts/bootstrap +++ b/scripts/bootstrap @@ -4,10 +4,18 @@ set -e cd -- "$(dirname -- "$0")/.." -if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "$SKIP_BREW" != "1" ]; then +if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ] && [ "$SKIP_BREW" != "1" ] && [ -t 0 ]; then brew bundle check >/dev/null 2>&1 || { - echo "==> Installing Homebrew dependencies…" - brew bundle + echo -n "==> Install Homebrew dependencies? (y/N): " + read -r response + case "$response" in + [yY][eE][sS]|[yY]) + brew bundle + ;; + *) + ;; + esac + echo } fi diff --git a/scripts/fast-format b/scripts/fast-format new file mode 100755 index 0000000..6d5973f --- /dev/null +++ b/scripts/fast-format @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -euo pipefail + +echo "Script started with $# arguments" +echo "Arguments: $*" +echo "Script location: $(dirname "$0")" + +cd -- "$(dirname "$0")/.." +echo "Changed to directory: $PWD" + +if [ $# -eq 0 ]; then + echo "Usage: $0 [additional-formatter-args...]" + echo "The file should contain one file path per line" + exit 1 +fi + +exec -- bundle exec rake format FORMAT_FILE="$1" diff --git a/sig/scrapegraphai/errors.rbs b/sig/scrapegraphai/errors.rbs index 7fe0bfb..c0afc3e 100644 --- a/sig/scrapegraphai/errors.rbs +++ b/sig/scrapegraphai/errors.rbs @@ -21,11 +21,14 @@ module Scrapegraphai attr_accessor status: Integer? + attr_accessor headers: ::Hash[String, String]? + attr_accessor body: top? def initialize: ( url: URI::Generic, ?status: Integer?, + ?headers: ::Hash[String, String]?, ?body: Object?, ?request: nil, ?response: nil, @@ -37,6 +40,7 @@ module Scrapegraphai def initialize: ( url: URI::Generic, ?status: nil, + ?headers: ::Hash[String, String]?, ?body: nil, ?request: nil, ?response: nil, @@ -48,6 +52,7 @@ module Scrapegraphai def initialize: ( url: URI::Generic, ?status: nil, + ?headers: ::Hash[String, String]?, ?body: nil, ?request: nil, ?response: nil, @@ -59,6 +64,7 @@ module Scrapegraphai def self.for: ( url: URI::Generic, status: Integer, + headers: ::Hash[String, String]?, body: Object?, request: nil, response: nil, @@ -68,6 +74,7 @@ module Scrapegraphai def initialize: ( url: URI::Generic, status: Integer, + headers: ::Hash[String, String]?, body: Object?, request: nil, response: nil, diff --git a/sig/scrapegraphai/file_part.rbs b/sig/scrapegraphai/file_part.rbs index 0514bba..cc19b4e 100644 --- a/sig/scrapegraphai/file_part.rbs +++ b/sig/scrapegraphai/file_part.rbs @@ -14,7 +14,7 @@ module Scrapegraphai def initialize: ( Pathname | StringIO | IO | String content, - ?filename: String?, + ?filename: (Pathname | String)?, ?content_type: String? ) -> void end diff --git a/test/scrapegraphai/internal/type/base_model_test.rb b/test/scrapegraphai/internal/type/base_model_test.rb index 81d9fdb..1b1e591 100644 --- a/test/scrapegraphai/internal/type/base_model_test.rb +++ b/test/scrapegraphai/internal/type/base_model_test.rb @@ -157,6 +157,7 @@ def test_dump_retry class Scrapegraphai::Test::EnumModelTest < Minitest::Test class E0 include Scrapegraphai::Internal::Type::Enum + attr_reader :values def initialize(*values) = (@values = values) @@ -476,6 +477,7 @@ def initialize(*variants) = variants.each { variant(_1) } module U1 extend Scrapegraphai::Internal::Type::Union + variant const: :a variant const: 2 end @@ -492,6 +494,7 @@ class M2 < Scrapegraphai::Internal::Type::BaseModel module U2 extend Scrapegraphai::Internal::Type::Union + discriminator :type variant :a, M1 @@ -500,6 +503,7 @@ module U2 module U3 extend Scrapegraphai::Internal::Type::Union + discriminator :type variant :a, M1 @@ -508,6 +512,7 @@ module U3 module U4 extend Scrapegraphai::Internal::Type::Union + discriminator :type variant String @@ -601,6 +606,7 @@ def test_coerce class Scrapegraphai::Test::BaseModelQoLTest < Minitest::Test class E0 include Scrapegraphai::Internal::Type::Enum + attr_reader :values def initialize(*values) = (@values = values) diff --git a/test/scrapegraphai/internal/util_test.rb b/test/scrapegraphai/internal/util_test.rb index 597e1aa..7e15816 100644 --- a/test/scrapegraphai/internal/util_test.rb +++ b/test/scrapegraphai/internal/util_test.rb @@ -213,22 +213,38 @@ def env_table end end + def test_encoding_length + headers, = Scrapegraphai::Internal::Util.encode_content( + {"content-type" => "multipart/form-data"}, + Pathname(__FILE__) + ) + assert_pattern do + headers.fetch("content-type") => /boundary=(.+)$/ + end + field, = Regexp.last_match.captures + assert(field.length < 70 - 6) + end + def test_file_encode file = Pathname(__FILE__) + fileinput = Scrapegraphai::Internal::Type::Converter.dump(Scrapegraphai::Internal::Type::FileInput, "abc") headers = {"content-type" => "multipart/form-data"} cases = { - "abc" => "abc", - StringIO.new("abc") => "abc", - Scrapegraphai::FilePart.new("abc") => "abc", - Scrapegraphai::FilePart.new(StringIO.new("abc")) => "abc", - file => /^class Scrapegraphai/, - Scrapegraphai::FilePart.new(file) => /^class Scrapegraphai/ + "abc" => ["", "abc"], + StringIO.new("abc") => ["", "abc"], + fileinput => %w[upload abc], + Scrapegraphai::FilePart.new(StringIO.new("abc")) => ["", "abc"], + file => [file.basename.to_path, /^class Scrapegraphai/], + Scrapegraphai::FilePart.new(file, filename: "d o g") => ["d%20o%20g", /^class Scrapegraphai/] } - cases.each do |body, val| + cases.each do |body, testcase| + filename, val = testcase encoded = Scrapegraphai::Internal::Util.encode_content(headers, body) cgi = FakeCGI.new(*encoded) + io = cgi[""] assert_pattern do - cgi[""].read => ^val + io.original_filename => ^filename + io.read => ^val end end end @@ -242,18 +258,21 @@ def test_hash_encode {strio: StringIO.new("a")} => {"strio" => "a"}, {strio: Scrapegraphai::FilePart.new("a")} => {"strio" => "a"}, {pathname: Pathname(__FILE__)} => {"pathname" => -> { _1.read in /^class Scrapegraphai/ }}, - {pathname: Scrapegraphai::FilePart.new(Pathname(__FILE__))} => { - "pathname" => -> { - _1.read in /^class Scrapegraphai/ - } - } + {pathname: Scrapegraphai::FilePart.new(Pathname(__FILE__))} => {"pathname" => -> { _1.read in /^class Scrapegraphai/ }} } cases.each do |body, testcase| encoded = Scrapegraphai::Internal::Util.encode_content(headers, body) cgi = FakeCGI.new(*encoded) testcase.each do |key, val| assert_pattern do - cgi[key] => ^val + parsed = + case (p = cgi[key]) + in StringIO + p.read + else + p + end + parsed => ^val end end end @@ -291,6 +310,31 @@ def test_copy_write end class Scrapegraphai::Test::UtilFusedEnumTest < Minitest::Test + def test_rewind_closing + touched = false + once = 0 + steps = 0 + enum = Enumerator.new do |y| + next if touched + + 10.times do + steps = _1 + y << _1 + end + ensure + once = once.succ + end + + fused = Scrapegraphai::Internal::Util.fused_enum(enum, external: true) do + touched = true + loop { enum.next } + end + Scrapegraphai::Internal::Util.close_fused!(fused) + + assert_equal(1, once) + assert_equal(0, steps) + end + def test_closing arr = [1, 2, 3] once = 0 @@ -324,9 +368,9 @@ def test_rewind_chain end def test_external_iteration - it = [1, 2, 3].to_enum - first = it.next - fused = Scrapegraphai::Internal::Util.fused_enum(it, external: true) + iter = [1, 2, 3].to_enum + first = iter.next + fused = Scrapegraphai::Internal::Util.fused_enum(iter, external: true) assert_equal(1, first) assert_equal([2, 3], fused.to_a)